1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3//
4// file: repattrn.cpp
5//
6/*
7***************************************************************************
8* Copyright (C) 2002-2016 International Business Machines Corporation
9* and others. All rights reserved.
10***************************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_REGULAR_EXPRESSIONS
16
17#include "unicode/regex.h"
18#include "unicode/uclean.h"
19#include "cmemory.h"
20#include "cstr.h"
21#include "uassert.h"
22#include "uhash.h"
23#include "uvector.h"
24#include "uvectr32.h"
25#include "uvectr64.h"
26#include "regexcmp.h"
27#include "regeximp.h"
28#include "regexst.h"
29
30U_NAMESPACE_BEGIN
31
32//--------------------------------------------------------------------------
33//
34// RegexPattern Default Constructor
35//
36//--------------------------------------------------------------------------
37RegexPattern::RegexPattern() {
38 // Init all of this instances data.
39 init();
40}
41
42
43//--------------------------------------------------------------------------
44//
45// Copy Constructor Note: This is a rather inefficient implementation,
46// but it probably doesn't matter.
47//
48//--------------------------------------------------------------------------
49RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
50 init();
51 *this = other;
52}
53
54
55
56//--------------------------------------------------------------------------
57//
58// Assignment Operator
59//
60//--------------------------------------------------------------------------
61RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62 if (this == &other) {
63 // Source and destination are the same. Don't do anything.
64 return *this;
65 }
66
67 // Clean out any previous contents of object being assigned to.
68 zap();
69
70 // Give target object a default initialization
71 init();
72
73 // Copy simple fields
74 fDeferredStatus = other.fDeferredStatus;
75
76 if (U_FAILURE(fDeferredStatus)) {
77 return *this;
78 }
79
80 if (other.fPatternString == NULL) {
81 fPatternString = NULL;
82 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
83 } else {
84 fPatternString = new UnicodeString(*(other.fPatternString));
85 if (fPatternString == NULL) {
86 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87 } else {
88 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
89 }
90 }
91 if (U_FAILURE(fDeferredStatus)) {
92 return *this;
93 }
94
95 fFlags = other.fFlags;
96 fLiteralText = other.fLiteralText;
97 fMinMatchLen = other.fMinMatchLen;
98 fFrameSize = other.fFrameSize;
99 fDataSize = other.fDataSize;
100 fStaticSets = other.fStaticSets;
101 fStaticSets8 = other.fStaticSets8;
102
103 fStartType = other.fStartType;
104 fInitialStringIdx = other.fInitialStringIdx;
105 fInitialStringLen = other.fInitialStringLen;
106 *fInitialChars = *other.fInitialChars;
107 fInitialChar = other.fInitialChar;
108 *fInitialChars8 = *other.fInitialChars8;
109 fNeedsAltInput = other.fNeedsAltInput;
110
111 // Copy the pattern. It's just values, nothing deep to copy.
112 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
113 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
114
115 // Copy the Unicode Sets.
116 // Could be made more efficient if the sets were reference counted and shared,
117 // but I doubt that pattern copying will be particularly common.
118 // Note: init() already added an empty element zero to fSets
119 int32_t i;
120 int32_t numSets = other.fSets->size();
121 fSets8 = new Regex8BitSet[numSets];
122 if (fSets8 == NULL) {
123 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
124 return *this;
125 }
126 for (i=1; i<numSets; i++) {
127 if (U_FAILURE(fDeferredStatus)) {
128 return *this;
129 }
130 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
131 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
132 if (newSet == NULL) {
133 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
134 break;
135 }
136 fSets->addElement(newSet, fDeferredStatus);
137 fSets8[i] = other.fSets8[i];
138 }
139
140 // Copy the named capture group hash map.
141 if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
142 int32_t hashPos = UHASH_FIRST;
143 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
144 if (U_FAILURE(fDeferredStatus)) {
145 break;
146 }
147 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
148 UnicodeString *key = new UnicodeString(*name);
149 int32_t val = hashEl->value.integer;
150 if (key == NULL) {
151 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
152 } else {
153 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
154 }
155 }
156 }
157 return *this;
158}
159
160
161//--------------------------------------------------------------------------
162//
163// init Shared initialization for use by constructors.
164// Bring an uninitialized RegexPattern up to a default state.
165//
166//--------------------------------------------------------------------------
167void RegexPattern::init() {
168 fFlags = 0;
169 fCompiledPat = 0;
170 fLiteralText.remove();
171 fSets = NULL;
172 fSets8 = NULL;
173 fDeferredStatus = U_ZERO_ERROR;
174 fMinMatchLen = 0;
175 fFrameSize = 0;
176 fDataSize = 0;
177 fGroupMap = NULL;
178 fStaticSets = NULL;
179 fStaticSets8 = NULL;
180 fStartType = START_NO_INFO;
181 fInitialStringIdx = 0;
182 fInitialStringLen = 0;
183 fInitialChars = NULL;
184 fInitialChar = 0;
185 fInitialChars8 = NULL;
186 fNeedsAltInput = FALSE;
187 fNamedCaptureMap = NULL;
188
189 fPattern = NULL; // will be set later
190 fPatternString = NULL; // may be set later
191 fCompiledPat = new UVector64(fDeferredStatus);
192 fGroupMap = new UVector32(fDeferredStatus);
193 fSets = new UVector(fDeferredStatus);
194 fInitialChars = new UnicodeSet;
195 fInitialChars8 = new Regex8BitSet;
196 if (U_FAILURE(fDeferredStatus)) {
197 return;
198 }
199 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
200 fInitialChars == NULL || fInitialChars8 == NULL) {
201 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
202 return;
203 }
204
205 // Slot zero of the vector of sets is reserved. Fill it here.
206 fSets->addElement((int32_t)0, fDeferredStatus);
207}
208
209
210bool RegexPattern::initNamedCaptureMap() {
211 if (fNamedCaptureMap) {
212 return true;
213 }
214 fNamedCaptureMap = uhash_openSize(uhash_hashUnicodeString, // Key hash function
215 uhash_compareUnicodeString, // Key comparator function
216 uhash_compareLong, // Value comparator function
217 7, // Initial table capacity
218 &fDeferredStatus);
219 if (U_FAILURE(fDeferredStatus)) {
220 return false;
221 }
222
223 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
224 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
225 return true;
226}
227
228//--------------------------------------------------------------------------
229//
230// zap Delete everything owned by this RegexPattern.
231//
232//--------------------------------------------------------------------------
233void RegexPattern::zap() {
234 delete fCompiledPat;
235 fCompiledPat = NULL;
236 int i;
237 for (i=1; i<fSets->size(); i++) {
238 UnicodeSet *s;
239 s = (UnicodeSet *)fSets->elementAt(i);
240 if (s != NULL) {
241 delete s;
242 }
243 }
244 delete fSets;
245 fSets = NULL;
246 delete[] fSets8;
247 fSets8 = NULL;
248 delete fGroupMap;
249 fGroupMap = NULL;
250 delete fInitialChars;
251 fInitialChars = NULL;
252 delete fInitialChars8;
253 fInitialChars8 = NULL;
254 if (fPattern != NULL) {
255 utext_close(fPattern);
256 fPattern = NULL;
257 }
258 if (fPatternString != NULL) {
259 delete fPatternString;
260 fPatternString = NULL;
261 }
262 if (fNamedCaptureMap != NULL) {
263 uhash_close(fNamedCaptureMap);
264 fNamedCaptureMap = NULL;
265 }
266}
267
268
269//--------------------------------------------------------------------------
270//
271// Destructor
272//
273//--------------------------------------------------------------------------
274RegexPattern::~RegexPattern() {
275 zap();
276}
277
278
279//--------------------------------------------------------------------------
280//
281// Clone
282//
283//--------------------------------------------------------------------------
284RegexPattern *RegexPattern::clone() const {
285 RegexPattern *copy = new RegexPattern(*this);
286 return copy;
287}
288
289
290//--------------------------------------------------------------------------
291//
292// operator == (comparison) Consider to patterns to be == if the
293// pattern strings and the flags are the same.
294// Note that pattern strings with the same
295// characters can still be considered different.
296//
297//--------------------------------------------------------------------------
298UBool RegexPattern::operator ==(const RegexPattern &other) const {
299 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
300 if (this->fPatternString != NULL && other.fPatternString != NULL) {
301 return *(this->fPatternString) == *(other.fPatternString);
302 } else if (this->fPattern == NULL) {
303 if (other.fPattern == NULL) {
304 return TRUE;
305 }
306 } else if (other.fPattern != NULL) {
307 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
308 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
309 return utext_equals(this->fPattern, other.fPattern);
310 }
311 }
312 return FALSE;
313}
314
315//---------------------------------------------------------------------
316//
317// compile
318//
319//---------------------------------------------------------------------
320RegexPattern * U_EXPORT2
321RegexPattern::compile(const UnicodeString &regex,
322 uint32_t flags,
323 UParseError &pe,
324 UErrorCode &status)
325{
326 if (U_FAILURE(status)) {
327 return NULL;
328 }
329
330 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
331 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
332 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
333
334 if ((flags & ~allFlags) != 0) {
335 status = U_REGEX_INVALID_FLAG;
336 return NULL;
337 }
338
339 if ((flags & UREGEX_CANON_EQ) != 0) {
340 status = U_REGEX_UNIMPLEMENTED;
341 return NULL;
342 }
343
344 RegexPattern *This = new RegexPattern;
345 if (This == NULL) {
346 status = U_MEMORY_ALLOCATION_ERROR;
347 return NULL;
348 }
349 if (U_FAILURE(This->fDeferredStatus)) {
350 status = This->fDeferredStatus;
351 delete This;
352 return NULL;
353 }
354 This->fFlags = flags;
355
356 RegexCompile compiler(This, status);
357 compiler.compile(regex, pe, status);
358
359 if (U_FAILURE(status)) {
360 delete This;
361 This = NULL;
362 }
363
364 return This;
365}
366
367
368//
369// compile, UText mode
370//
371RegexPattern * U_EXPORT2
372RegexPattern::compile(UText *regex,
373 uint32_t flags,
374 UParseError &pe,
375 UErrorCode &status)
376{
377 if (U_FAILURE(status)) {
378 return NULL;
379 }
380
381 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
382 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
383 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
384
385 if ((flags & ~allFlags) != 0) {
386 status = U_REGEX_INVALID_FLAG;
387 return NULL;
388 }
389
390 if ((flags & UREGEX_CANON_EQ) != 0) {
391 status = U_REGEX_UNIMPLEMENTED;
392 return NULL;
393 }
394
395 RegexPattern *This = new RegexPattern;
396 if (This == NULL) {
397 status = U_MEMORY_ALLOCATION_ERROR;
398 return NULL;
399 }
400 if (U_FAILURE(This->fDeferredStatus)) {
401 status = This->fDeferredStatus;
402 delete This;
403 return NULL;
404 }
405 This->fFlags = flags;
406
407 RegexCompile compiler(This, status);
408 compiler.compile(regex, pe, status);
409
410 if (U_FAILURE(status)) {
411 delete This;
412 This = NULL;
413 }
414
415 return This;
416}
417
418//
419// compile with default flags.
420//
421RegexPattern * U_EXPORT2
422RegexPattern::compile(const UnicodeString &regex,
423 UParseError &pe,
424 UErrorCode &err)
425{
426 return compile(regex, 0, pe, err);
427}
428
429
430//
431// compile with default flags, UText mode
432//
433RegexPattern * U_EXPORT2
434RegexPattern::compile(UText *regex,
435 UParseError &pe,
436 UErrorCode &err)
437{
438 return compile(regex, 0, pe, err);
439}
440
441
442//
443// compile with no UParseErr parameter.
444//
445RegexPattern * U_EXPORT2
446RegexPattern::compile(const UnicodeString &regex,
447 uint32_t flags,
448 UErrorCode &err)
449{
450 UParseError pe;
451 return compile(regex, flags, pe, err);
452}
453
454
455//
456// compile with no UParseErr parameter, UText mode
457//
458RegexPattern * U_EXPORT2
459RegexPattern::compile(UText *regex,
460 uint32_t flags,
461 UErrorCode &err)
462{
463 UParseError pe;
464 return compile(regex, flags, pe, err);
465}
466
467
468//---------------------------------------------------------------------
469//
470// flags
471//
472//---------------------------------------------------------------------
473uint32_t RegexPattern::flags() const {
474 return fFlags;
475}
476
477
478//---------------------------------------------------------------------
479//
480// matcher(UnicodeString, err)
481//
482//---------------------------------------------------------------------
483RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
484 UErrorCode &status) const {
485 RegexMatcher *retMatcher = matcher(status);
486 if (retMatcher != NULL) {
487 retMatcher->fDeferredStatus = status;
488 retMatcher->reset(input);
489 }
490 return retMatcher;
491}
492
493
494//---------------------------------------------------------------------
495//
496// matcher(status)
497//
498//---------------------------------------------------------------------
499RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
500 RegexMatcher *retMatcher = NULL;
501
502 if (U_FAILURE(status)) {
503 return NULL;
504 }
505 if (U_FAILURE(fDeferredStatus)) {
506 status = fDeferredStatus;
507 return NULL;
508 }
509
510 retMatcher = new RegexMatcher(this);
511 if (retMatcher == NULL) {
512 status = U_MEMORY_ALLOCATION_ERROR;
513 return NULL;
514 }
515 return retMatcher;
516}
517
518
519
520//---------------------------------------------------------------------
521//
522// matches Convenience function to test for a match, starting
523// with a pattern string and a data string.
524//
525//---------------------------------------------------------------------
526UBool U_EXPORT2 RegexPattern::matches(const UnicodeString &regex,
527 const UnicodeString &input,
528 UParseError &pe,
529 UErrorCode &status) {
530
531 if (U_FAILURE(status)) {return FALSE;}
532
533 UBool retVal;
534 RegexPattern *pat = NULL;
535 RegexMatcher *matcher = NULL;
536
537 pat = RegexPattern::compile(regex, 0, pe, status);
538 matcher = pat->matcher(input, status);
539 retVal = matcher->matches(status);
540
541 delete matcher;
542 delete pat;
543 return retVal;
544}
545
546
547//
548// matches, UText mode
549//
550UBool U_EXPORT2 RegexPattern::matches(UText *regex,
551 UText *input,
552 UParseError &pe,
553 UErrorCode &status) {
554
555 if (U_FAILURE(status)) {return FALSE;}
556
557 UBool retVal = FALSE;
558 RegexPattern *pat = NULL;
559 RegexMatcher *matcher = NULL;
560
561 pat = RegexPattern::compile(regex, 0, pe, status);
562 matcher = pat->matcher(status);
563 if (U_SUCCESS(status)) {
564 matcher->reset(input);
565 retVal = matcher->matches(status);
566 }
567
568 delete matcher;
569 delete pat;
570 return retVal;
571}
572
573
574
575
576
577//---------------------------------------------------------------------
578//
579// pattern
580//
581//---------------------------------------------------------------------
582UnicodeString RegexPattern::pattern() const {
583 if (fPatternString != NULL) {
584 return *fPatternString;
585 } else if (fPattern == NULL) {
586 return UnicodeString();
587 } else {
588 UErrorCode status = U_ZERO_ERROR;
589 int64_t nativeLen = utext_nativeLength(fPattern);
590 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
591 UnicodeString result;
592
593 status = U_ZERO_ERROR;
594 UChar *resultChars = result.getBuffer(len16);
595 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
596 result.releaseBuffer(len16);
597
598 return result;
599 }
600}
601
602
603
604
605//---------------------------------------------------------------------
606//
607// patternText
608//
609//---------------------------------------------------------------------
610UText *RegexPattern::patternText(UErrorCode &status) const {
611 if (U_FAILURE(status)) {return NULL;}
612 status = U_ZERO_ERROR;
613
614 if (fPattern != NULL) {
615 return fPattern;
616 } else {
617 RegexStaticSets::initGlobals(&status);
618 return RegexStaticSets::gStaticSets->fEmptyText;
619 }
620}
621
622
623//--------------------------------------------------------------------------------
624//
625// groupNumberFromName()
626//
627//--------------------------------------------------------------------------------
628int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
629 if (U_FAILURE(status)) {
630 return 0;
631 }
632
633 // No need to explicitly check for syntactically valid names.
634 // Invalid ones will never be in the map, and the lookup will fail.
635
636 int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
637 if (number == 0) {
638 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
639 }
640 return number;
641}
642
643int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
644 if (U_FAILURE(status)) {
645 return 0;
646 }
647 UnicodeString name(groupName, nameLength, US_INV);
648 return groupNumberFromName(name, status);
649}
650
651
652//---------------------------------------------------------------------
653//
654// split
655//
656//---------------------------------------------------------------------
657int32_t RegexPattern::split(const UnicodeString &input,
658 UnicodeString dest[],
659 int32_t destCapacity,
660 UErrorCode &status) const
661{
662 if (U_FAILURE(status)) {
663 return 0;
664 }
665
666 RegexMatcher m(this);
667 int32_t r = 0;
668 // Check m's status to make sure all is ok.
669 if (U_SUCCESS(m.fDeferredStatus)) {
670 r = m.split(input, dest, destCapacity, status);
671 }
672 return r;
673}
674
675//
676// split, UText mode
677//
678int32_t RegexPattern::split(UText *input,
679 UText *dest[],
680 int32_t destCapacity,
681 UErrorCode &status) const
682{
683 if (U_FAILURE(status)) {
684 return 0;
685 }
686
687 RegexMatcher m(this);
688 int32_t r = 0;
689 // Check m's status to make sure all is ok.
690 if (U_SUCCESS(m.fDeferredStatus)) {
691 r = m.split(input, dest, destCapacity, status);
692 }
693 return r;
694}
695
696
697//---------------------------------------------------------------------
698//
699// dump Output the compiled form of the pattern.
700// Debugging function only.
701//
702//---------------------------------------------------------------------
703void RegexPattern::dumpOp(int32_t index) const {
704 (void)index; // Suppress warnings in non-debug build.
705#if defined(REGEX_DEBUG)
706 static const char * const opNames[] = {URX_OPCODE_NAMES};
707 int32_t op = fCompiledPat->elementAti(index);
708 int32_t val = URX_VAL(op);
709 int32_t type = URX_TYPE(op);
710 int32_t pinnedType = type;
711 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
712 pinnedType = 0;
713 }
714
715 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
716 switch (type) {
717 case URX_NOP:
718 case URX_DOTANY:
719 case URX_DOTANY_ALL:
720 case URX_FAIL:
721 case URX_CARET:
722 case URX_DOLLAR:
723 case URX_BACKSLASH_G:
724 case URX_BACKSLASH_X:
725 case URX_END:
726 case URX_DOLLAR_M:
727 case URX_CARET_M:
728 // Types with no operand field of interest.
729 break;
730
731 case URX_RESERVED_OP:
732 case URX_START_CAPTURE:
733 case URX_END_CAPTURE:
734 case URX_STATE_SAVE:
735 case URX_JMP:
736 case URX_JMP_SAV:
737 case URX_JMP_SAV_X:
738 case URX_BACKSLASH_B:
739 case URX_BACKSLASH_BU:
740 case URX_BACKSLASH_D:
741 case URX_BACKSLASH_Z:
742 case URX_STRING_LEN:
743 case URX_CTR_INIT:
744 case URX_CTR_INIT_NG:
745 case URX_CTR_LOOP:
746 case URX_CTR_LOOP_NG:
747 case URX_RELOC_OPRND:
748 case URX_STO_SP:
749 case URX_LD_SP:
750 case URX_BACKREF:
751 case URX_STO_INP_LOC:
752 case URX_JMPX:
753 case URX_LA_START:
754 case URX_LA_END:
755 case URX_BACKREF_I:
756 case URX_LB_START:
757 case URX_LB_CONT:
758 case URX_LB_END:
759 case URX_LBN_CONT:
760 case URX_LBN_END:
761 case URX_LOOP_C:
762 case URX_LOOP_DOT_I:
763 case URX_BACKSLASH_H:
764 case URX_BACKSLASH_R:
765 case URX_BACKSLASH_V:
766 // types with an integer operand field.
767 printf("%d", val);
768 break;
769
770 case URX_ONECHAR:
771 case URX_ONECHAR_I:
772 if (val < 0x20) {
773 printf("%#x", val);
774 } else {
775 printf("'%s'", CStr(UnicodeString(val))());
776 }
777 break;
778
779 case URX_STRING:
780 case URX_STRING_I:
781 {
782 int32_t lengthOp = fCompiledPat->elementAti(index+1);
783 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
784 int32_t length = URX_VAL(lengthOp);
785 UnicodeString str(fLiteralText, val, length);
786 printf("%s", CStr(str)());
787 }
788 break;
789
790 case URX_SETREF:
791 case URX_LOOP_SR_I:
792 {
793 UnicodeString s;
794 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
795 set->toPattern(s, TRUE);
796 printf("%s", CStr(s)());
797 }
798 break;
799
800 case URX_STATIC_SETREF:
801 case URX_STAT_SETREF_N:
802 {
803 UnicodeString s;
804 if (val & URX_NEG_SET) {
805 printf("NOT ");
806 val &= ~URX_NEG_SET;
807 }
808 UnicodeSet *set = fStaticSets[val];
809 set->toPattern(s, TRUE);
810 printf("%s", CStr(s)());
811 }
812 break;
813
814
815 default:
816 printf("??????");
817 break;
818 }
819 printf("\n");
820#endif
821}
822
823
824void RegexPattern::dumpPattern() const {
825#if defined(REGEX_DEBUG)
826 int index;
827
828 UnicodeString patStr;
829 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
830 patStr.append(c);
831 }
832 printf("Original Pattern: \"%s\"\n", CStr(patStr)());
833 printf(" Min Match Length: %d\n", fMinMatchLen);
834 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
835 if (fStartType == START_STRING) {
836 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
837 printf(" Initial match string: \"%s\"\n", CStr(initialString)());
838 } else if (fStartType == START_SET) {
839 UnicodeString s;
840 fInitialChars->toPattern(s, TRUE);
841 printf(" Match First Chars: %s\n", CStr(s)());
842
843 } else if (fStartType == START_CHAR) {
844 printf(" First char of Match: ");
845 if (fInitialChar > 0x20) {
846 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
847 } else {
848 printf("%#x\n", fInitialChar);
849 }
850 }
851
852 printf("Named Capture Groups:\n");
853 if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
854 printf(" None\n");
855 } else {
856 int32_t pos = UHASH_FIRST;
857 const UHashElement *el = NULL;
858 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
859 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
860 int32_t number = el->value.integer;
861 printf(" %d\t%s\n", number, CStr(*name)());
862 }
863 }
864
865 printf("\nIndex Binary Type Operand\n" \
866 "-------------------------------------------\n");
867 for (index = 0; index<fCompiledPat->size(); index++) {
868 dumpOp(index);
869 }
870 printf("\n\n");
871#endif
872}
873
874
875
876UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
877
878U_NAMESPACE_END
879#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
880