| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | // |
| 5 | // Provides basic interpreted regular expression matching. This is meant as a debugging tool, |
| 6 | // and if regular expressions become necessary in a non-debug scenario great care should be |
| 7 | // used to ensure that performance is not impaired, and a more thorough review of this could |
| 8 | // would also be a good thing. This file does not include any concrete instantiations but |
| 9 | // instead provides the basic building blocks. Some concrete instantiations can be found in |
| 10 | // regex_util.h. |
| 11 | // |
| 12 | // NOTE: See code:clr::regex::RegExBase (below) for description of supported regex language. |
| 13 | // |
| 14 | // NOTE: we had to forego standard options such as tr1::regex |
| 15 | // (http://en.wikipedia.org/wiki/Technical_Report_1#Regular_Expressions) and Microsoft's |
| 16 | // internal GRETA regular expressions (http://toolbox/sites/987/default.aspx) because they |
| 17 | // both rely heavily on the STL, which can not currently be used within the CLR. |
| 18 | // |
| 19 | // NOTE: If this becomes non-debug-only, then read the comment on WCHARItemTraits for what |
| 20 | // what needs fixing. |
| 21 | // |
| 22 | |
| 23 | // |
| 24 | |
| 25 | #ifndef _REGEX_BASE_H_ |
| 26 | #define _REGEX_BASE_H_ |
| 27 | |
| 28 | // Forward declare namespace so that it is not debug-only (even if currently there is nothing |
| 29 | // but debug-only code in the namespace). This enables a "using namespace clr;" line in a |
| 30 | // header file without having to worry about whether or not it's in a debug-only block. |
| 31 | namespace clr {} |
| 32 | |
| 33 | #ifdef _DEBUG |
| 34 | |
| 35 | #include "utilcode.h" // for string hash functions |
| 36 | #include "sstring.h" |
| 37 | |
| 38 | namespace clr { |
| 39 | namespace regex { |
| 40 | |
| 41 | // Implementation details. Code contained in any "imp" namespace should never be directly used |
| 42 | // by clients of RegEx. |
| 43 | namespace imp { |
| 44 | |
| 45 | //=================================================================================================== |
| 46 | // Helper for clr::regex::RegExBase. See class definition for clr::regex::RegExBase below for more |
| 47 | // information. |
| 48 | |
| 49 | template <typename ITEM_TRAITS, typename GROUP_CONTAINER> |
| 50 | class RegExBaseHelper : protected ITEM_TRAITS |
| 51 | { |
| 52 | public: |
| 53 | typedef typename ITEM_TRAITS::RegexIterator RegexIterator; |
| 54 | typedef typename ITEM_TRAITS::InputIterator InputIterator; |
| 55 | |
| 56 | typedef typename ITEM_TRAITS::MatchFlags MatchFlags; |
| 57 | static const MatchFlags DefaultMatchFlags = ITEM_TRAITS::DefaultMatchFlags; |
| 58 | |
| 59 | // Arguments: |
| 60 | // regex : marks the start of the regular expression string. |
| 61 | // regexEnd : marks the end of the regular expression string. |
| 62 | // input : marks the start of the input string against which regex will be matched. |
| 63 | // inputEnd : marks the end of the input string. |
| 64 | // groups : recipient of regular expression groups. |
| 65 | // |
| 66 | // Returns true if the regular expression was successfully matched against the input string; |
| 67 | // otherwise false. |
| 68 | |
| 69 | RegExBaseHelper(const RegexIterator& regex, |
| 70 | const RegexIterator& regexEnd, |
| 71 | const InputIterator& input, |
| 72 | const InputIterator& inputEnd, |
| 73 | GROUP_CONTAINER& groups, |
| 74 | const MatchFlags flags = DefaultMatchFlags); |
| 75 | |
| 76 | // The main entrypoint to RegExBaseHelper, Match will attempt to match the regular expression |
| 77 | // defined by [regex,regexEnd) against the input defined by [input,inputEnd). |
| 78 | bool Match() |
| 79 | { WRAPPER_NO_CONTRACT; return DoMatch(m_regex, m_input); } |
| 80 | |
| 81 | protected: |
| 82 | typedef typename ITEM_TRAITS::Item Item; |
| 83 | typedef typename ITEM_TRAITS::ItemType ItemType; |
| 84 | |
| 85 | // Try to match regex at any point within input, starting with the first character and moving |
| 86 | // along one at a time until a match is found or the end of the input is encountered, whichever |
| 87 | // comes first. |
| 88 | bool DoMatch( |
| 89 | const RegexIterator& regex, |
| 90 | InputIterator input); |
| 91 | |
| 92 | // Try to match regex starting exactly at input. |
| 93 | bool DoMatchHere( |
| 94 | const RegexIterator& regex, |
| 95 | const InputIterator& input); |
| 96 | |
| 97 | // The function returns true if a match is found consisting of zero or more items c followed by a |
| 98 | // successful match of regex on the remaining input; otherwise false is returned. This is a |
| 99 | // conservative match, so it starts with trying to match zero items followed by regex, |
| 100 | // and will then try to match one item followed by regex. |
| 101 | bool DoMatchStar( |
| 102 | const Item& c, |
| 103 | const RegexIterator& regex, |
| 104 | InputIterator input); |
| 105 | |
| 106 | // The function returns true if a match is found consisting of zero or more items c followed by a |
| 107 | // successful match of regex on the remaining input; otherwise false is returned. This is a |
| 108 | // greedy match, so it starts with trying to match as many items as it can followed by regex, |
| 109 | // and on failure will try again with one less items matched. |
| 110 | bool DoMatchStarEagerly( |
| 111 | const Item& c, |
| 112 | const RegexIterator& regex, |
| 113 | InputIterator input); |
| 114 | |
| 115 | // Convenience method. |
| 116 | Item GetItem( |
| 117 | const RegexIterator ®ex) |
| 118 | { WRAPPER_NO_CONTRACT; return ITEM_TRAITS::GetItem(regex, m_regexEnd, m_flags); } |
| 119 | |
| 120 | // Convenience method. |
| 121 | bool MatchItem( |
| 122 | const Item& c, |
| 123 | const InputIterator& input) |
| 124 | { WRAPPER_NO_CONTRACT; return ITEM_TRAITS::MatchItem(c, input, m_inputEnd, m_flags); } |
| 125 | |
| 126 | // Declared as protected to prevent direct instantiation. |
| 127 | RegExBaseHelper() |
| 128 | {} |
| 129 | |
| 130 | RegexIterator m_regex; |
| 131 | RegexIterator m_regexEnd; |
| 132 | InputIterator m_input; |
| 133 | InputIterator m_inputEnd; |
| 134 | GROUP_CONTAINER& m_groups; |
| 135 | MatchFlags m_flags; |
| 136 | }; |
| 137 | |
| 138 | //--------------------------------------------------------------------------------------------------- |
| 139 | // This method simply stores the end iterators for the regular expression and the input strings, as |
| 140 | // well as the group collection object and flags, and forwards the call to DoMatch. |
| 141 | |
| 142 | template <typename ITEM_TRAITS, typename GROUP_CONTAINER> |
| 143 | RegExBaseHelper<ITEM_TRAITS, GROUP_CONTAINER>::RegExBaseHelper( |
| 144 | const RegexIterator& regex, |
| 145 | const RegexIterator& regexEnd, |
| 146 | const InputIterator& input, |
| 147 | const InputIterator& inputEnd, |
| 148 | GROUP_CONTAINER& groups, |
| 149 | const MatchFlags flags) |
| 150 | : m_regex(regex), |
| 151 | m_regexEnd(regexEnd), |
| 152 | m_input(input), |
| 153 | m_inputEnd(inputEnd), |
| 154 | m_groups(groups), |
| 155 | m_flags(flags) |
| 156 | { WRAPPER_NO_CONTRACT; } |
| 157 | |
| 158 | //--------------------------------------------------------------------------------------------------- |
| 159 | // This method checks if the regular expression starts with a caret, indicating that any match must |
| 160 | // be anchored at the start of the input string. If such a caret exists, one match attempt is made |
| 161 | // on the input starting with the first character and the result is returned. If the regex does not |
| 162 | // start with a caret, the method attempts to match against the input string, starting at the first |
| 163 | // character and moving one character over for each successive attempt, until a match is found or |
| 164 | // the end of the input is encountered, whichever comes first. |
| 165 | |
| 166 | template <typename ITEM_TRAITS, typename GROUP_CONTAINER> |
| 167 | inline bool |
| 168 | RegExBaseHelper<ITEM_TRAITS, GROUP_CONTAINER>::DoMatch( |
| 169 | const RegexIterator& regex, |
| 170 | InputIterator input) |
| 171 | { |
| 172 | WRAPPER_NO_CONTRACT; |
| 173 | |
| 174 | if (GetItem(regex).GetType() == ITEM_TRAITS::CARET) |
| 175 | { // Match must occur from the beginning of the line |
| 176 | m_groups.OpenGroup(input, m_inputEnd); |
| 177 | bool res = DoMatchHere(regex+1, input); |
| 178 | if (!res) |
| 179 | m_groups.CancelGroup(); |
| 180 | return res; |
| 181 | } |
| 182 | else |
| 183 | { // Match can happen anywhere in the string |
| 184 | do |
| 185 | { // Attempt to match against each substring [x,inputEnd) for x = 0...inputEnd |
| 186 | |
| 187 | // Begin the group that contains the entire match. |
| 188 | m_groups.OpenGroup(input, m_inputEnd); |
| 189 | |
| 190 | if (DoMatchHere(regex, input)) |
| 191 | { // Success. Note that the entire match group is closed elsewhere on a |
| 192 | // successful match. |
| 193 | return true; |
| 194 | } |
| 195 | |
| 196 | // On failure, cancel the group so that it can be reopened on the new substring |
| 197 | m_groups.CancelGroup(); |
| 198 | } while (input++ != m_inputEnd); |
| 199 | } |
| 200 | |
| 201 | // No successful match found. |
| 202 | return false; |
| 203 | } |
| 204 | |
| 205 | //------------------------------------------------------------------------------------------------------- |
| 206 | // This is the main loop, which handles grouping constructs, repetition directives (*, *?, +, +?), and |
| 207 | // EOL matches ($), delegating all character matching to ITEM_TRAITS::MatchItem |
| 208 | // The general algorithm is: |
| 209 | // 1. Get the next item. |
| 210 | // 2. If the item is a DOLLAR type, check to see if we're at the end of the retular expression and |
| 211 | // the input string, and if so return success. |
| 212 | // 3. If the item is a grouping construct, open or close the appropriate group and continue matching. |
| 213 | // On failure, roll back the grouping change so that subsequent attemts will have correct state. |
| 214 | // 4. Check to see if the item following the current is a repetition directive, and if so take the |
| 215 | // appropriate action. |
| 216 | // 5. Otherwise defer to ITEM_TRAITS::MatchItem and if successful continue to match the remaining |
| 217 | // regular expression and input string; otherwise return failure. |
| 218 | |
| 219 | template <typename ITEM_TRAITS, typename GROUP_CONTAINER> |
| 220 | inline bool |
| 221 | RegExBaseHelper<ITEM_TRAITS, GROUP_CONTAINER>::DoMatchHere( |
| 222 | const RegexIterator& regex, |
| 223 | const InputIterator& input) |
| 224 | { |
| 225 | WRAPPER_NO_CONTRACT; |
| 226 | |
| 227 | if (regex == m_regexEnd) |
| 228 | { // Reached the end of the regular expression without ever returning false, |
| 229 | // implying a successful match. Close the overall match group and return. |
| 230 | m_groups.CloseGroup(input); |
| 231 | return true; |
| 232 | } |
| 233 | |
| 234 | Item c0 = GetItem(regex); |
| 235 | if (c0.GetType() == ITEM_TRAITS::DOLLAR && (c0.GetNext() == m_regexEnd)) |
| 236 | { // Matches EOL if a '$' is encountered at the end of the input. |
| 237 | m_groups.CloseGroup(input); |
| 238 | // Success only if we're actually at the end of the input string. |
| 239 | return input == m_inputEnd; |
| 240 | } |
| 241 | if (c0.GetType() == ITEM_TRAITS::PAREN_OPEN) |
| 242 | { // Encountered an open parenthesis ('('); open a new grouping. |
| 243 | m_groups.OpenGroup(input, m_inputEnd); |
| 244 | bool res = DoMatchHere(c0.GetNext(), input); |
| 245 | if (!res) |
| 246 | { // If a match fails, there could be further attempts (such as if |
| 247 | // there is an active repetition matching frame beneath us), so |
| 248 | // need to cancel the group we just opened so that the grouping |
| 249 | // structure remains consistent. |
| 250 | m_groups.CancelGroup(); |
| 251 | } |
| 252 | return res; |
| 253 | } |
| 254 | if (c0.GetType() == ITEM_TRAITS::PAREN_CLOSE) |
| 255 | { // Close the most recent open grouping. |
| 256 | COUNT_T i = m_groups.CloseGroup(input); |
| 257 | bool res = DoMatchHere(c0.GetNext(), input); |
| 258 | if (!res) |
| 259 | { // For the same reasons as the need to cancel an opened group |
| 260 | // explained above, we need to reopen the closed group if a |
| 261 | // match fails. |
| 262 | m_groups.ReopenGroup(i, m_inputEnd); |
| 263 | } |
| 264 | return res; |
| 265 | } |
| 266 | |
| 267 | if (c0.GetNext() != m_regexEnd) |
| 268 | { // If there is another item in the regex string following the current one, get |
| 269 | // it to see if it is a repetition matching directive. |
| 270 | Item c1 = GetItem(c0.GetNext()); |
| 271 | if (c1.GetType() == ITEM_TRAITS::STAR) |
| 272 | { // '*' matching directive encountered |
| 273 | if (c1.GetNext() != m_regexEnd) |
| 274 | { |
| 275 | Item c2 = GetItem(c1.GetNext()); |
| 276 | if (c2.GetType() == ITEM_TRAITS::QUESTION_MARK) |
| 277 | { // conservative matching semantics requested |
| 278 | return DoMatchStar(c0, c2.GetNext(), input); |
| 279 | } |
| 280 | } |
| 281 | // Eager matching |
| 282 | return DoMatchStarEagerly(c0, c1.GetNext(), input); |
| 283 | } |
| 284 | if (c1.GetType() == ITEM_TRAITS::PLUS) |
| 285 | { // '+' matching directive encountered. |
| 286 | if (c1.GetNext() != m_regexEnd) |
| 287 | { |
| 288 | Item c2 = GetItem(c1.GetNext()); |
| 289 | if (c2.GetType() == ITEM_TRAITS::QUESTION_MARK) |
| 290 | { // conservative matching semantics requested |
| 291 | return MatchItem(c0, input) && DoMatchStar(c0, c2.GetNext(), input+1); |
| 292 | } |
| 293 | } |
| 294 | // Eager matching |
| 295 | return MatchItem(c0, input) && DoMatchStarEagerly(c0, c1.GetNext(), input+1); |
| 296 | } |
| 297 | if (c1.GetType() == ITEM_TRAITS::QUESTION_MARK) |
| 298 | { // '?' matching directive encountered |
| 299 | return (MatchItem(c0, input) && DoMatchHere(c1.GetNext(), input+1)) || DoMatchHere(c1.GetNext(), input); |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | // No special matching semantics encountered, delegate the matching to ITEM_TRAITS::MatchItem |
| 304 | return MatchItem(c0, input) && DoMatchHere(c0.GetNext(), input+1); |
| 305 | } |
| 306 | |
| 307 | //------------------------------------------------------------------------------------------------------- |
| 308 | // Conservative '*' repetition matching. This attempts to match zero items c followed by a match of |
| 309 | // regex. If this fails, attempt to match one item c followed by a match of regex. Repeat until item c |
| 310 | // does not match or a successful match is found. |
| 311 | |
| 312 | template <typename ITEM_TRAITS, typename GROUP_CONTAINER> |
| 313 | inline bool |
| 314 | RegExBaseHelper<ITEM_TRAITS, GROUP_CONTAINER>::DoMatchStar( |
| 315 | const Item& c, |
| 316 | const RegexIterator& regex, |
| 317 | InputIterator input) |
| 318 | { |
| 319 | WRAPPER_NO_CONTRACT; |
| 320 | CONSISTENCY_CHECK(input != m_inputEnd); |
| 321 | |
| 322 | do { |
| 323 | if (DoMatchHere(regex, input)) |
| 324 | { // A successful match is found! |
| 325 | return true; |
| 326 | } |
| 327 | // No successful match, so try to match one more item and then attempt to match regex on the |
| 328 | // remaining input. |
| 329 | } while (input != m_inputEnd && MatchItem(c, input++)); |
| 330 | return false; |
| 331 | } |
| 332 | |
| 333 | //------------------------------------------------------------------------------------------------------- |
| 334 | // Similar to DoMatchStar above, except this algorithm matches as many items c as possible first followed |
| 335 | // by regex on the remaining input, and on failure tries again with a match against one less item c |
| 336 | // followed by regex on the remaining input, and repeats until there are no items c remaining to match |
| 337 | // and the zero item match followed by a match of regex on the entire remaining input fails. If any of |
| 338 | // the match attempts succeed, return success. |
| 339 | |
| 340 | template <typename ITEM_TRAITS, typename GROUP_CONTAINER> |
| 341 | inline bool |
| 342 | RegExBaseHelper<ITEM_TRAITS, GROUP_CONTAINER>::DoMatchStarEagerly( |
| 343 | const Item& c, |
| 344 | const RegexIterator& regex, |
| 345 | InputIterator input) |
| 346 | { |
| 347 | WRAPPER_NO_CONTRACT; |
| 348 | |
| 349 | // Make sure we keep a hold of how far back we can unwind. |
| 350 | InputIterator inputOrig = input; |
| 351 | |
| 352 | // First, determine the maximum number of matches against item c. |
| 353 | while (input != m_inputEnd && MatchItem(c, input)) |
| 354 | { |
| 355 | ++input; |
| 356 | } |
| 357 | |
| 358 | do |
| 359 | { // Work backwards from the maximum number of matches of item c until a match is found |
| 360 | // or until we have backed right up to the starting value of input (saved in inputOrig), |
| 361 | // at which time we admit failure. |
| 362 | if (DoMatchHere(regex, input)) |
| 363 | return true; |
| 364 | } while (inputOrig != input--); |
| 365 | return false; |
| 366 | } |
| 367 | |
| 368 | } // namespace imp |
| 369 | |
| 370 | //======================================================================================================= |
| 371 | // Represents a matched group using iterators to denote the string contained by [Begin(),End()). |
| 372 | |
| 373 | template<typename INPUT_ITERATOR> |
| 374 | class Group |
| 375 | { |
| 376 | public: |
| 377 | typedef INPUT_ITERATOR InputIterator; |
| 378 | |
| 379 | // |
| 380 | // Functions for accessing group properties |
| 381 | // |
| 382 | |
| 383 | // Returns the iterator indicating the start of the group |
| 384 | const InputIterator& Begin() const |
| 385 | { LIMITED_METHOD_CONTRACT; return m_begin; } |
| 386 | |
| 387 | // Returns the iterator indicating the first non-member of the group |
| 388 | const InputIterator& End() const |
| 389 | { LIMITED_METHOD_CONTRACT; return m_end; } |
| 390 | |
| 391 | // It is possible that m_end - m_begin could be greater than the maximum of COUNT_T. m_end and |
| 392 | // m_begin are the end and start of a string, so is entirely unlikely to overflow a COUNT_T. |
| 393 | // Conbined with the fact that this is debug-only code, opting not to replace all COUNT_T |
| 394 | // uses with SIZE_T. |
| 395 | COUNT_T Length() const |
| 396 | { WRAPPER_NO_CONTRACT; return static_cast<COUNT_T>(m_end - m_begin); } |
| 397 | |
| 398 | // |
| 399 | // Functions used by RegExBaseHelper to create grouping constructs. |
| 400 | // |
| 401 | |
| 402 | Group() |
| 403 | : m_isClosed(false), m_begin(), m_end() |
| 404 | { WRAPPER_NO_CONTRACT; } |
| 405 | |
| 406 | Group(const InputIterator& start, const InputIterator& end, bool isClosed = false) |
| 407 | : m_isClosed(isClosed), m_begin(start), m_end(end) |
| 408 | { WRAPPER_NO_CONTRACT; } |
| 409 | |
| 410 | void SetBegin(const InputIterator& start) |
| 411 | { WRAPPER_NO_CONTRACT; m_begin = start; } |
| 412 | |
| 413 | void SetEnd(const InputIterator& end) |
| 414 | { WRAPPER_NO_CONTRACT; m_end = end; } |
| 415 | |
| 416 | bool IsClosed() const |
| 417 | { LIMITED_METHOD_CONTRACT; return m_isClosed; } |
| 418 | |
| 419 | void SetIsClosed(bool isClosed) |
| 420 | { WRAPPER_NO_CONTRACT; m_isClosed = isClosed; } |
| 421 | |
| 422 | protected: |
| 423 | bool m_isClosed; |
| 424 | InputIterator m_begin; |
| 425 | InputIterator m_end; |
| 426 | }; |
| 427 | |
| 428 | //======================================================================================================= |
| 429 | // Represents a generic container of groups, defaulting to using Group<INPUT_ITERATOR> as its element |
| 430 | // type. This container satisfies the method requrements of RegExBase. When a match is successful, the |
| 431 | // match groups may be accessed using the index operator or the iterators definin the matched groups |
| 432 | // [Begin(), End()). |
| 433 | |
| 434 | template <typename INPUT_ITERATOR, typename GROUP_TYPE = Group<INPUT_ITERATOR> > |
| 435 | class GroupContainer |
| 436 | { |
| 437 | public: |
| 438 | typedef typename SArray<GROUP_TYPE>::Iterator Iterator; |
| 439 | |
| 440 | // |
| 441 | // Functions for enumerating groups |
| 442 | // |
| 443 | |
| 444 | GROUP_TYPE & operator[](COUNT_T idx) |
| 445 | { |
| 446 | WRAPPER_NO_CONTRACT; |
| 447 | CONSISTENCY_CHECK(((COUNT_T)(COUNT_T)idx) == idx); |
| 448 | return m_array[idx]; |
| 449 | } |
| 450 | |
| 451 | // Returns an iterator to the first matched group (which is always the string for the |
| 452 | // entire successfully matched string. Specific groups start at Begin()+1 and continue |
| 453 | // to End()-1. |
| 454 | Iterator Begin() |
| 455 | { WRAPPER_NO_CONTRACT; return m_array.Begin(); } |
| 456 | |
| 457 | // Returns the first invalid iterator value. |
| 458 | Iterator End() |
| 459 | { WRAPPER_NO_CONTRACT; return m_array.End(); } |
| 460 | |
| 461 | // |
| 462 | COUNT_T Count() const |
| 463 | { WRAPPER_NO_CONTRACT; return m_array.GetCount(); } |
| 464 | |
| 465 | // |
| 466 | // Functions used by RegExBaseHelper to create grouping constructs. |
| 467 | // |
| 468 | |
| 469 | // Note: OpenGroup takes an end iterator so that the group will have a valid (if possibly |
| 470 | // incorrect) endpoint in the case that the regular expression has unbalanced grouping |
| 471 | // parentheses. |
| 472 | void OpenGroup(const INPUT_ITERATOR& start, const INPUT_ITERATOR& end) |
| 473 | { WRAPPER_NO_CONTRACT; m_array.Append(GROUP_TYPE(start, end, false)); } |
| 474 | |
| 475 | COUNT_T CloseGroup(const INPUT_ITERATOR& end); |
| 476 | |
| 477 | void ReopenGroup(COUNT_T i, const INPUT_ITERATOR& end); |
| 478 | |
| 479 | void CancelGroup() |
| 480 | { WRAPPER_NO_CONTRACT; m_array.Delete(m_array.End() - 1); } |
| 481 | |
| 482 | private: |
| 483 | SArray<GROUP_TYPE> m_array; |
| 484 | }; |
| 485 | |
| 486 | //------------------------------------------------------------------------------------------------------- |
| 487 | // Works backwards from the most recently created group looking for an open group to close. Returns |
| 488 | // the index of the group that was closed, which is used in the event that a group needs to be |
| 489 | // reopened. |
| 490 | |
| 491 | template <typename INPUT_ITERATOR, typename GROUP_TYPE> |
| 492 | COUNT_T |
| 493 | GroupContainer<INPUT_ITERATOR, GROUP_TYPE>::CloseGroup( |
| 494 | const INPUT_ITERATOR& end) |
| 495 | { |
| 496 | WRAPPER_NO_CONTRACT; |
| 497 | |
| 498 | for (COUNT_T i = (COUNT_T)Count(); i > 0; --i) |
| 499 | { |
| 500 | if (!m_array[i-1].IsClosed()) |
| 501 | { |
| 502 | m_array[i-1].SetEnd(end); |
| 503 | m_array[i-1].SetIsClosed(true); |
| 504 | return i-1; |
| 505 | } |
| 506 | } |
| 507 | |
| 508 | _ASSERTE(!"Unmatched grouping constructs!" ); |
| 509 | return 0; |
| 510 | } |
| 511 | |
| 512 | //------------------------------------------------------------------------------------------------------- |
| 513 | // Reopen a group at the given index, using 'end' to overwrite the current end. |
| 514 | |
| 515 | template <typename INPUT_ITERATOR, typename GROUP_TYPE> |
| 516 | void |
| 517 | GroupContainer<INPUT_ITERATOR, GROUP_TYPE>::ReopenGroup( |
| 518 | COUNT_T i, |
| 519 | const INPUT_ITERATOR& end) |
| 520 | { |
| 521 | WRAPPER_NO_CONTRACT; |
| 522 | CONSISTENCY_CHECK(i > 0 && i < Count()); |
| 523 | |
| 524 | if (i > 0 && i < Count()) |
| 525 | { |
| 526 | m_array[i].SetEnd(end); |
| 527 | m_array[i].SetIsClosed(false); |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | //======================================================================================================= |
| 532 | // Empty group container that satisfies the method requirements of RegExBase but has empty bodies. This |
| 533 | // allows for non-allocating matches when grouping is not required. |
| 534 | |
| 535 | template <typename INPUT_ITERATOR> |
| 536 | class NullGroupContainer |
| 537 | { |
| 538 | public: |
| 539 | void OpenGroup(INPUT_ITERATOR, INPUT_ITERATOR) {} |
| 540 | COUNT_T CloseGroup(INPUT_ITERATOR) { return 0; } |
| 541 | void ReopenGroup(COUNT_T, INPUT_ITERATOR) {} |
| 542 | void CancelGroup() {} |
| 543 | }; |
| 544 | |
| 545 | //======================================================================================================= |
| 546 | // This mini-implementation of regular expression matching supports the |
| 547 | // following constructs: |
| 548 | // ^ matches the beginning of the input string |
| 549 | // $ matches the end of the input string |
| 550 | // * matches zero or more occurrences of the previous item eagerly |
| 551 | // *? matches zero or more occurrences of the previous item conservatively |
| 552 | // + matches 1 or more occurrences of the previous item eagerly |
| 553 | // +? matches 1 or more occurrences of the previous item conservatively |
| 554 | // ? matches 0 or 1 occurences of the previous item |
| 555 | // ( starts a grouping |
| 556 | // ) ends a grouping |
| 557 | // |
| 558 | // IMPORTANT: These are just anchoring and grouping constructs. See the definition for ItemTraitsBase |
| 559 | // below for information on the default character classes that are supported. (The intent of |
| 560 | // this separation is to allow customization of the character classes where required.) |
| 561 | |
| 562 | // ITEM_TRAITS provides traits for individual tokens in a regular expression, as well as a mechanism for |
| 563 | // matching said individual components with the target string. RegexBase derives from ITEM_TRAITS in a |
| 564 | // protected fashion, and is responsible for providing the following: |
| 565 | // 1. "RegexIterator" typedef |
| 566 | // Used as an iterator into the regular expression, and used as arguments to indicate the start |
| 567 | // and the end of the regular expression string. |
| 568 | // 2. "InputIterator" typedef |
| 569 | // Used as an iterator into the input string, and used as arguments to indicate the start |
| 570 | // and the end of the input string. |
| 571 | // (NOTE: RegexIterator and InputIterator are often typedef'ed to be the same thing.) |
| 572 | // 3. "Item" typedef. |
| 573 | // This will be used with methods GetItem and MatchItem (see below). Item must |
| 574 | // define the the following methods: |
| 575 | // ItemType GetType() : returns the type of the item. See below for explanation of ItemType |
| 576 | // const RegexIterator& GetNext() : iterator pointing to the start of the next item. |
| 577 | // 4. "MatchFlags" typedef, and "static const DefaultMatchFlags" value. |
| 578 | // Provided for calls to "Match" and "Matches", and passed on to calls "GetItem" and "MatchItem". |
| 579 | // 5. enum ItemType |
| 580 | // Defines the following minimum values: |
| 581 | // DOT |
| 582 | // CARET |
| 583 | // DOLLAR |
| 584 | // STAR |
| 585 | // QUESTION_MARK |
| 586 | // PLUS |
| 587 | // PAREN_OPEN |
| 588 | // PAREN_CLOSE |
| 589 | // ItemType may include more values, and may even choose to ignore the above enum types, all of |
| 590 | // which must be recognized by GetItem and MatchItem (see below). |
| 591 | // 6. static Item GetItem(const RegexIterator& regex, |
| 592 | // const RegexIterator& regexEnd, |
| 593 | // const MatchFlags& flags) |
| 594 | // This method takes a regular expression iterator and returns the next regular expression |
| 595 | // element (Item) pointed to by the iterator. |
| 596 | // 7. static bool MatchItem(const Item& c, |
| 597 | // const InputIterator& input, |
| 598 | // const InputIterator& inputEnd, |
| 599 | // const MatchFlags &flags) |
| 600 | |
| 601 | // GROUP_CONTAINER provides functionality for keeping track of regular expression groups. This is a generic |
| 602 | // argument to Match, and the type of the object must support the following methods: |
| 603 | // 1. void OpenGroup(const InputIterator& start, const InputIterator& end); |
| 604 | // Called when a PAREN_OPEN item is encountered. |
| 605 | // 2. COUNT_T CloseGroup(const InputIterator& end); |
| 606 | // Called when a PAREN_CLOSE item is encountered. Returns the index of the group that was closed. |
| 607 | // 3. void ReopenGroup(COUNT_T i, const InputIterator& end); |
| 608 | // Called when a match following a call to CloseGroup fails, essentially requesting a rollback |
| 609 | // of the call to CloseGroup. |
| 610 | // 4. void CancelGroup(); |
| 611 | // Called when a match following a call to OpenGroup fails, essentially requesting a rollback |
| 612 | // of the call to OpenGroup. |
| 613 | |
| 614 | template <typename ITEM_TRAITS> |
| 615 | class RegExBase : public ITEM_TRAITS |
| 616 | { |
| 617 | public: |
| 618 | typedef typename ITEM_TRAITS::RegexIterator RegexIterator; |
| 619 | typedef typename ITEM_TRAITS::InputIterator InputIterator; |
| 620 | |
| 621 | // This is a convenience typedef that allows a caller to easily declare a grouping container |
| 622 | // to be passed to a call to Match. An example would be (see regex_util.h for a definition of |
| 623 | // SStringRegEx): |
| 624 | // |
| 625 | // SString input(SL"Simmons"); |
| 626 | // SStringRegEx::GroupingContainer container; |
| 627 | // if (SStringRegEx::Match(SL"(Sim+on)", input, container)) { |
| 628 | // printf("%S", container[1].GetSString(input).GetUnicode()); |
| 629 | // } |
| 630 | // |
| 631 | typedef GroupContainer<InputIterator, Group<InputIterator> > GroupingContainer; |
| 632 | |
| 633 | // Pulls down the typedef for MatchFlags and initialized a static representing the default flags. |
| 634 | typedef typename ITEM_TRAITS::MatchFlags MatchFlags; |
| 635 | static const MatchFlags DefaultMatchFlags = ITEM_TRAITS::DefaultMatchFlags; |
| 636 | |
| 637 | template <typename GROUP_CONTAINER> |
| 638 | static bool Match(RegexIterator regex, |
| 639 | RegexIterator regexEnd, |
| 640 | InputIterator input, |
| 641 | InputIterator inputEnd, |
| 642 | GROUP_CONTAINER& groups, |
| 643 | MatchFlags flags = DefaultMatchFlags) |
| 644 | { |
| 645 | imp::RegExBaseHelper<ITEM_TRAITS, GROUP_CONTAINER> |
| 646 | re(regex, regexEnd, input, inputEnd, groups, flags); |
| 647 | return re.Match(); |
| 648 | } |
| 649 | |
| 650 | static bool Matches(RegexIterator regex, |
| 651 | RegexIterator regexEnd, |
| 652 | InputIterator input, |
| 653 | InputIterator inputEnd, |
| 654 | MatchFlags flags = DefaultMatchFlags) |
| 655 | { |
| 656 | NullGroupContainer<InputIterator> ngc; |
| 657 | return Match(regex, regexEnd, input, inputEnd, ngc, flags); |
| 658 | } |
| 659 | }; |
| 660 | |
| 661 | //======================================================================================================= |
| 662 | // In addition to requirements specified on RegExBase, StandardItemTraits provides the following |
| 663 | // additinal regular expression item types. |
| 664 | // c matches any literal character c |
| 665 | // . matches any single character |
| 666 | // \d any literal digit character |
| 667 | // \w any alpha character |
| 668 | // \s any whitespace character |
| 669 | // |
| 670 | // Child types of ItemTraitsBase must implement GetItem and MatchItem (see below for full |
| 671 | // signature requirements). Current child type implementations permit a backslash ('\') to escape |
| 672 | // special characters ('.', '$', '*', etc.) and allow them to be interpreted as literal characters. |
| 673 | // |
| 674 | // This type describes a particular behaviour, but must be subtyped for the particular target character |
| 675 | // needed, and GetItem and MatchItem must be implemented. |
| 676 | // |
| 677 | |
| 678 | template <typename ITERATOR_TYPE, typename CHAR_TYPE> |
| 679 | class ItemTraitsBase |
| 680 | { |
| 681 | public: |
| 682 | typedef ITERATOR_TYPE RegexIterator; |
| 683 | typedef ITERATOR_TYPE InputIterator; |
| 684 | |
| 685 | enum MatchFlags |
| 686 | { |
| 687 | MF_NONE = 0x00, |
| 688 | MF_CASE_INSENSITIVE = 0x01 // Match character literals as case insensitive. |
| 689 | }; |
| 690 | |
| 691 | static const MatchFlags DefaultMatchFlags = MF_NONE; |
| 692 | |
| 693 | protected: |
| 694 | ItemTraitsBase() |
| 695 | {} |
| 696 | |
| 697 | enum ItemType |
| 698 | { |
| 699 | // REQUIRED, as described in RegExBase |
| 700 | CARET, |
| 701 | DOLLAR, |
| 702 | STAR, |
| 703 | QUESTION_MARK, |
| 704 | PLUS, |
| 705 | PAREN_OPEN, |
| 706 | PAREN_CLOSE, |
| 707 | // ADDITIONAL |
| 708 | DOT, // any literal character |
| 709 | DIGIT, // any digit |
| 710 | ALPHA, // any alpha character, upper or lower case |
| 711 | WHITESPACE, // any whitespace character |
| 712 | NON_WHITESPACE, // any non-whitespace character |
| 713 | CHARACTER, // a specific literal character |
| 714 | }; |
| 715 | |
| 716 | class Item |
| 717 | { |
| 718 | public: |
| 719 | Item(ItemType type, CHAR_TYPE val, const RegexIterator& next) |
| 720 | : m_type(type), m_val(val), m_next(next) |
| 721 | { WRAPPER_NO_CONTRACT; } |
| 722 | |
| 723 | Item(ItemType type, const RegexIterator& next) |
| 724 | : m_type(type), m_val(0), m_next(next) |
| 725 | { WRAPPER_NO_CONTRACT; } |
| 726 | |
| 727 | ItemType GetType() const |
| 728 | { LIMITED_METHOD_CONTRACT; return m_type; } |
| 729 | |
| 730 | const RegexIterator& GetNext() const |
| 731 | { LIMITED_METHOD_CONTRACT; return m_next; } |
| 732 | |
| 733 | CHAR_TYPE GetValue() const |
| 734 | { LIMITED_METHOD_CONTRACT; return m_val; } |
| 735 | |
| 736 | protected: |
| 737 | ItemType m_type; |
| 738 | CHAR_TYPE m_val; |
| 739 | RegexIterator m_next; |
| 740 | }; |
| 741 | |
| 742 | // All deriving types must add the following methods: |
| 743 | // static Item GetItem(const RegexIterator& regex, const RegexIterator& regexEnd); |
| 744 | // static bool MatchItem(const Item& c, const InputIterator& input, const InputIterator& inputEnd); |
| 745 | }; |
| 746 | |
| 747 | //======================================================================================================= |
| 748 | // Implements ItemTraitsBase, provides matching for UNICODE characters. |
| 749 | // |
| 750 | // !!!IMPORTANT!!! |
| 751 | // This is not a complete unicode implementation - only the equivalent of ASCII alpha characters are |
| 752 | // consider to be part of the alpha set, and this is also the only set on which case insensitive |
| 753 | // operations will correctly work. If RegEx is moved out of DEBUG ONLY, then this will have to be fixed |
| 754 | // to properly address these issues. |
| 755 | // !!!IMPORTANT!!! |
| 756 | |
| 757 | template <typename ITERATOR_TYPE> |
| 758 | class WCHARItemTraits : public ItemTraitsBase<ITERATOR_TYPE, WCHAR> |
| 759 | { |
| 760 | public: |
| 761 | typedef ItemTraitsBase<ITERATOR_TYPE, WCHAR> PARENT_TYPE; |
| 762 | typedef typename PARENT_TYPE::RegexIterator RegexIterator; |
| 763 | typedef typename PARENT_TYPE::InputIterator InputIterator; |
| 764 | typedef typename PARENT_TYPE::Item Item; |
| 765 | typedef typename PARENT_TYPE::MatchFlags MatchFlags; |
| 766 | |
| 767 | static Item GetItem(const RegexIterator& regex, const RegexIterator& regexEnd, MatchFlags flags); |
| 768 | static bool MatchItem(const Item& c, const InputIterator& input, const InputIterator& inputEnd, MatchFlags flags); |
| 769 | |
| 770 | protected: |
| 771 | WCHARItemTraits() |
| 772 | {} |
| 773 | |
| 774 | private: |
| 775 | static inline bool IS_UPPER_A_TO_Z(WCHAR x) |
| 776 | { WRAPPER_NO_CONTRACT; return (((x) >= W('A')) && ((x) <= W('Z'))); } |
| 777 | |
| 778 | static inline bool IS_LOWER_A_TO_Z(WCHAR x) |
| 779 | { WRAPPER_NO_CONTRACT; return (((x) >= W('a')) && ((x) <= W('z'))); } |
| 780 | |
| 781 | static inline WCHAR UPCASE(WCHAR x) |
| 782 | { WRAPPER_NO_CONTRACT; return (IS_LOWER_A_TO_Z(x) ? ((x) - W('a') + W('A')) : (x)); } |
| 783 | |
| 784 | static inline WCHAR DOWNCASE(WCHAR x) |
| 785 | { WRAPPER_NO_CONTRACT; return (IS_UPPER_A_TO_Z(x) ? ((x) - W('A') + W('a')) : (x)); } |
| 786 | |
| 787 | static bool MatchCharacter(WCHAR c1, WCHAR c2, MatchFlags flags) |
| 788 | { WRAPPER_NO_CONTRACT; return (flags & PARENT_TYPE::MF_CASE_INSENSITIVE) ? (DOWNCASE(c1) == DOWNCASE(c2)) : (c1 == c2); } |
| 789 | }; |
| 790 | |
| 791 | //------------------------------------------------------------------------------------------------------- |
| 792 | // Reads the next item from regex, recognizing special characters outlined in ItemTraitsBase. |
| 793 | |
| 794 | template <typename ITERATOR_TYPE> |
| 795 | typename WCHARItemTraits<ITERATOR_TYPE>::Item |
| 796 | WCHARItemTraits<ITERATOR_TYPE>::GetItem( |
| 797 | const RegexIterator& regex, |
| 798 | const RegexIterator& regexEnd, |
| 799 | MatchFlags flags) |
| 800 | { |
| 801 | WRAPPER_NO_CONTRACT; |
| 802 | |
| 803 | if (*regex == W('\\')) |
| 804 | { |
| 805 | const RegexIterator regexNext = regex+1; |
| 806 | if (regexNext == regexEnd) |
| 807 | return Item(PARENT_TYPE::CHARACTER, W('\\'), regexNext); |
| 808 | if (*regexNext == W('d')) |
| 809 | return Item(PARENT_TYPE::DIGIT, regexNext+1); |
| 810 | if (*regexNext == W('w')) |
| 811 | return Item(PARENT_TYPE::ALPHA, regexNext+1); |
| 812 | if (*regexNext == W('s')) |
| 813 | return Item(PARENT_TYPE::WHITESPACE, regexNext+1); |
| 814 | if (*regexNext == W('S')) |
| 815 | return Item(PARENT_TYPE::NON_WHITESPACE, regexNext+1); |
| 816 | return Item(PARENT_TYPE::CHARACTER, *regexNext, regexNext+1); |
| 817 | } |
| 818 | else if (*regex == W('.')) |
| 819 | return Item(PARENT_TYPE::DOT, W('.'), regex+1); |
| 820 | else if (*regex == W('^')) |
| 821 | return Item(PARENT_TYPE::CARET, W('^'), regex+1); |
| 822 | else if (*regex == W('$')) |
| 823 | return Item(PARENT_TYPE::DOLLAR, W('$'), regex+1); |
| 824 | else if (*regex == W('*')) |
| 825 | return Item(PARENT_TYPE::STAR, W('*'), regex+1); |
| 826 | else if (*regex == W('?')) |
| 827 | return Item(PARENT_TYPE::QUESTION_MARK, W('?'), regex+1); |
| 828 | else if (*regex == W('+')) |
| 829 | return Item(PARENT_TYPE::PLUS, W('+'), regex+1); |
| 830 | else if (*regex == W('(')) |
| 831 | return Item(PARENT_TYPE::PAREN_OPEN, W('('), regex+1); |
| 832 | else if (*regex == W(')')) |
| 833 | return Item(PARENT_TYPE::PAREN_CLOSE, W(')'), regex+1); |
| 834 | else |
| 835 | return Item(PARENT_TYPE::CHARACTER, *regex, regex + 1); |
| 836 | } |
| 837 | |
| 838 | //------------------------------------------------------------------------------------------------------- |
| 839 | // Returns true if the next character point to by input matches the character class described by c. |
| 840 | |
| 841 | template <typename ITERATOR_TYPE> |
| 842 | bool |
| 843 | WCHARItemTraits<ITERATOR_TYPE>::MatchItem( |
| 844 | const Item& c, |
| 845 | const InputIterator& input, |
| 846 | const InputIterator& inputEnd, |
| 847 | MatchFlags flags) |
| 848 | { |
| 849 | WRAPPER_NO_CONTRACT; |
| 850 | |
| 851 | if (c.GetType() == PARENT_TYPE::DIGIT) |
| 852 | return *input >= W('0') && *input <= W('9'); |
| 853 | else if (c.GetType() == PARENT_TYPE::ALPHA) |
| 854 | return (*input >= W('a') && *input <= W('z')) || (*input >= W('A') && *input <= W('Z')); |
| 855 | else if (c.GetType() == PARENT_TYPE::WHITESPACE) |
| 856 | return *input == W(' ') || *input == W('\t'); |
| 857 | else if (c.GetType() == PARENT_TYPE::NON_WHITESPACE) |
| 858 | return !(*input == W(' ') || *input == W('\t')); |
| 859 | else |
| 860 | return c.GetType() == PARENT_TYPE::DOT || MatchCharacter(c.GetValue(), *input, flags); |
| 861 | } |
| 862 | |
| 863 | //======================================================================================================= |
| 864 | // Implements ItemTraitsBase, provides matching for ASCII (*not* UTF8) characters. |
| 865 | |
| 866 | template <typename ITERATOR_TYPE> |
| 867 | class CHARItemTraits : public ItemTraitsBase<ITERATOR_TYPE, CHAR> |
| 868 | { |
| 869 | public: |
| 870 | typedef ItemTraitsBase<ITERATOR_TYPE, CHAR> PARENT_TYPE; |
| 871 | typedef typename PARENT_TYPE::RegexIterator RegexIterator; |
| 872 | typedef typename PARENT_TYPE::InputIterator InputIterator; |
| 873 | typedef typename PARENT_TYPE::Item Item; |
| 874 | typedef typename PARENT_TYPE::MatchFlags MatchFlags; |
| 875 | |
| 876 | static Item GetItem(const RegexIterator& regex, const RegexIterator& regexEnd, MatchFlags flags); |
| 877 | static bool MatchItem(const Item& c, const InputIterator& input, const InputIterator& inputEnd, MatchFlags flags); |
| 878 | |
| 879 | protected: |
| 880 | CHARItemTraits() |
| 881 | {} |
| 882 | |
| 883 | private: |
| 884 | static inline bool IS_UPPER_A_TO_Z(CHAR x) |
| 885 | { WRAPPER_NO_CONTRACT; return (((x) >= 'A') && ((x) <= 'Z')); } |
| 886 | |
| 887 | static inline bool IS_LOWER_A_TO_Z(CHAR x) |
| 888 | { WRAPPER_NO_CONTRACT; return (((x) >= 'a') && ((x) <= 'z')); } |
| 889 | |
| 890 | static inline CHAR UPCASE(CHAR x) |
| 891 | { WRAPPER_NO_CONTRACT; return (IS_LOWER_A_TO_Z(x) ? ((x) - 'a' + 'A') : (x)); } |
| 892 | |
| 893 | static inline CHAR DOWNCASE(CHAR x) |
| 894 | { WRAPPER_NO_CONTRACT; return (IS_UPPER_A_TO_Z(x) ? ((x) - 'A' + 'a') : (x)); } |
| 895 | |
| 896 | static bool MatchCharacter(CHAR c1, CHAR c2, MatchFlags flags) |
| 897 | { WRAPPER_NO_CONTRACT; return (flags & PARENT_TYPE::MF_CASE_INSENSITIVE) ? (DOWNCASE(c1) == DOWNCASE(c2)) : (c1 == c2); } |
| 898 | }; |
| 899 | |
| 900 | //------------------------------------------------------------------------------------------------------- |
| 901 | // Reads the next item from regex, recognizing special characters outlined in ItemTraitsBase. |
| 902 | |
| 903 | template <typename ITERATOR_TYPE> |
| 904 | typename CHARItemTraits<ITERATOR_TYPE>::Item |
| 905 | CHARItemTraits<ITERATOR_TYPE>::GetItem( |
| 906 | const RegexIterator& regex, |
| 907 | const RegexIterator& regexEnd, |
| 908 | MatchFlags flags) |
| 909 | { |
| 910 | WRAPPER_NO_CONTRACT; |
| 911 | |
| 912 | if (*regex == '\\') |
| 913 | { |
| 914 | const RegexIterator regexNext = regex+1; |
| 915 | if (regexNext == regexEnd) |
| 916 | return Item(PARENT_TYPE::CHARACTER, W('\\'), regexNext); |
| 917 | if (*regexNext == 'd') |
| 918 | return Item(PARENT_TYPE::DIGIT, regexNext+1); |
| 919 | if (*regexNext == 'w') |
| 920 | return Item(PARENT_TYPE::ALPHA, regexNext+1); |
| 921 | if (*regexNext == 's') |
| 922 | return Item(PARENT_TYPE::WHITESPACE, regexNext+1); |
| 923 | return Item(PARENT_TYPE::CHARACTER, *regexNext, regexNext+1); |
| 924 | } |
| 925 | else if (*regex == '.') |
| 926 | return Item(PARENT_TYPE::DOT, '.', regex+1); |
| 927 | else if (*regex == '^') |
| 928 | return Item(PARENT_TYPE::CARET, '^', regex+1); |
| 929 | else if (*regex == '$') |
| 930 | return Item(PARENT_TYPE::DOLLAR, '$', regex+1); |
| 931 | else if (*regex == '*') |
| 932 | return Item(PARENT_TYPE::STAR, '*', regex+1); |
| 933 | else if (*regex == '?') |
| 934 | return Item(PARENT_TYPE::QUESTION_MARK, '?', regex+1); |
| 935 | else if (*regex == '+') |
| 936 | return Item(PARENT_TYPE::PLUS, '+', regex+1); |
| 937 | else if (*regex == '(') |
| 938 | return Item(PARENT_TYPE::PAREN_OPEN, '(', regex+1); |
| 939 | else if (*regex == ')') |
| 940 | return Item(PARENT_TYPE::PAREN_CLOSE, ')', regex+1); |
| 941 | else |
| 942 | return Item(PARENT_TYPE::CHARACTER, *regex, regex + 1); |
| 943 | } |
| 944 | |
| 945 | //------------------------------------------------------------------------------------------------------- |
| 946 | // Returns true if the next character point to by input matches the character class described by c. |
| 947 | |
| 948 | template <typename ITERATOR_TYPE> |
| 949 | bool |
| 950 | CHARItemTraits<ITERATOR_TYPE>::MatchItem( |
| 951 | const Item& c, |
| 952 | const InputIterator& input, |
| 953 | const InputIterator& inputEnd, |
| 954 | MatchFlags flags) |
| 955 | { |
| 956 | WRAPPER_NO_CONTRACT; |
| 957 | |
| 958 | if (c.GetType() == PARENT_TYPE::DIGIT) |
| 959 | return *input >= W('0') && *input <= W('9'); |
| 960 | else if (c.GetType() == PARENT_TYPE::ALPHA) |
| 961 | return (*input >= W('a') && *input <= W('z')) || (*input >= W('A') && *input <= W('Z')); |
| 962 | else if (c.GetType() == PARENT_TYPE::WHITESPACE) |
| 963 | return *input == W(' ') || *input == W('\t'); |
| 964 | else |
| 965 | return c.GetType() == PARENT_TYPE::DOT || MatchCharacter(c.GetValue(), *input, flags); |
| 966 | } |
| 967 | |
| 968 | } /* namespace regex */ |
| 969 | } /* namespace clr */ |
| 970 | |
| 971 | #endif // _DEBUG |
| 972 | |
| 973 | #endif // _REGEX_BASE_H_ |
| 974 | |