1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3//
4// file: repattrn.cpp
5//
6/*
7***************************************************************************
8* Copyright (C) 2002-2016 International Business Machines Corporation
9* and others. All rights reserved.
10***************************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_REGULAR_EXPRESSIONS
16
17#include "unicode/regex.h"
18#include "unicode/uclean.h"
19#include "cmemory.h"
20#include "cstr.h"
21#include "uassert.h"
22#include "uhash.h"
23#include "uvector.h"
24#include "uvectr32.h"
25#include "uvectr64.h"
26#include "regexcmp.h"
27#include "regeximp.h"
28#include "regexst.h"
29
30U_NAMESPACE_BEGIN
31
32//--------------------------------------------------------------------------
33//
34// RegexPattern Default Constructor
35//
36//--------------------------------------------------------------------------
37RegexPattern::RegexPattern() {
38 // Init all of this instances data.
39 init();
40}
41
42
43//--------------------------------------------------------------------------
44//
45// Copy Constructor Note: This is a rather inefficient implementation,
46// but it probably doesn't matter.
47//
48//--------------------------------------------------------------------------
49RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
50 init();
51 *this = other;
52}
53
54
55
56//--------------------------------------------------------------------------
57//
58// Assignment Operator
59//
60//--------------------------------------------------------------------------
61RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62 if (this == &other) {
63 // Source and destination are the same. Don't do anything.
64 return *this;
65 }
66
67 // Clean out any previous contents of object being assigned to.
68 zap();
69
70 // Give target object a default initialization
71 init();
72
73 // Copy simple fields
74 fDeferredStatus = other.fDeferredStatus;
75
76 if (U_FAILURE(fDeferredStatus)) {
77 return *this;
78 }
79
80 if (other.fPatternString == NULL) {
81 fPatternString = NULL;
82 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
83 } else {
84 fPatternString = new UnicodeString(*(other.fPatternString));
85 if (fPatternString == NULL) {
86 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87 } else {
88 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
89 }
90 }
91 if (U_FAILURE(fDeferredStatus)) {
92 return *this;
93 }
94
95 fFlags = other.fFlags;
96 fLiteralText = other.fLiteralText;
97 fMinMatchLen = other.fMinMatchLen;
98 fFrameSize = other.fFrameSize;
99 fDataSize = other.fDataSize;
100
101 fStartType = other.fStartType;
102 fInitialStringIdx = other.fInitialStringIdx;
103 fInitialStringLen = other.fInitialStringLen;
104 *fInitialChars = *other.fInitialChars;
105 fInitialChar = other.fInitialChar;
106 *fInitialChars8 = *other.fInitialChars8;
107 fNeedsAltInput = other.fNeedsAltInput;
108
109 // Copy the pattern. It's just values, nothing deep to copy.
110 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
111 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
112
113 // Copy the Unicode Sets.
114 // Could be made more efficient if the sets were reference counted and shared,
115 // but I doubt that pattern copying will be particularly common.
116 // Note: init() already added an empty element zero to fSets
117 int32_t i;
118 int32_t numSets = other.fSets->size();
119 fSets8 = new Regex8BitSet[numSets];
120 if (fSets8 == NULL) {
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122 return *this;
123 }
124 for (i=1; i<numSets; i++) {
125 if (U_FAILURE(fDeferredStatus)) {
126 return *this;
127 }
128 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
129 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
130 if (newSet == NULL) {
131 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
132 break;
133 }
134 fSets->addElement(newSet, fDeferredStatus);
135 fSets8[i] = other.fSets8[i];
136 }
137
138 // Copy the named capture group hash map.
139 if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
140 int32_t hashPos = UHASH_FIRST;
141 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
142 if (U_FAILURE(fDeferredStatus)) {
143 break;
144 }
145 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
146 UnicodeString *key = new UnicodeString(*name);
147 int32_t val = hashEl->value.integer;
148 if (key == NULL) {
149 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
150 } else {
151 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
152 }
153 }
154 }
155 return *this;
156}
157
158
159//--------------------------------------------------------------------------
160//
161// init Shared initialization for use by constructors.
162// Bring an uninitialized RegexPattern up to a default state.
163//
164//--------------------------------------------------------------------------
165void RegexPattern::init() {
166 fFlags = 0;
167 fCompiledPat = 0;
168 fLiteralText.remove();
169 fSets = NULL;
170 fSets8 = NULL;
171 fDeferredStatus = U_ZERO_ERROR;
172 fMinMatchLen = 0;
173 fFrameSize = 0;
174 fDataSize = 0;
175 fGroupMap = NULL;
176 fStartType = START_NO_INFO;
177 fInitialStringIdx = 0;
178 fInitialStringLen = 0;
179 fInitialChars = NULL;
180 fInitialChar = 0;
181 fInitialChars8 = NULL;
182 fNeedsAltInput = FALSE;
183 fNamedCaptureMap = NULL;
184
185 fPattern = NULL; // will be set later
186 fPatternString = NULL; // may be set later
187 fCompiledPat = new UVector64(fDeferredStatus);
188 fGroupMap = new UVector32(fDeferredStatus);
189 fSets = new UVector(fDeferredStatus);
190 fInitialChars = new UnicodeSet;
191 fInitialChars8 = new Regex8BitSet;
192 if (U_FAILURE(fDeferredStatus)) {
193 return;
194 }
195 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
196 fInitialChars == NULL || fInitialChars8 == NULL) {
197 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
198 return;
199 }
200
201 // Slot zero of the vector of sets is reserved. Fill it here.
202 fSets->addElement((int32_t)0, fDeferredStatus);
203}
204
205
206bool RegexPattern::initNamedCaptureMap() {
207 if (fNamedCaptureMap) {
208 return true;
209 }
210 fNamedCaptureMap = uhash_openSize(uhash_hashUnicodeString, // Key hash function
211 uhash_compareUnicodeString, // Key comparator function
212 uhash_compareLong, // Value comparator function
213 7, // Initial table capacity
214 &fDeferredStatus);
215 if (U_FAILURE(fDeferredStatus)) {
216 return false;
217 }
218
219 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
220 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
221 return true;
222}
223
224//--------------------------------------------------------------------------
225//
226// zap Delete everything owned by this RegexPattern.
227//
228//--------------------------------------------------------------------------
229void RegexPattern::zap() {
230 delete fCompiledPat;
231 fCompiledPat = NULL;
232 int i;
233 for (i=1; i<fSets->size(); i++) {
234 UnicodeSet *s;
235 s = (UnicodeSet *)fSets->elementAt(i);
236 if (s != NULL) {
237 delete s;
238 }
239 }
240 delete fSets;
241 fSets = NULL;
242 delete[] fSets8;
243 fSets8 = NULL;
244 delete fGroupMap;
245 fGroupMap = NULL;
246 delete fInitialChars;
247 fInitialChars = NULL;
248 delete fInitialChars8;
249 fInitialChars8 = NULL;
250 if (fPattern != NULL) {
251 utext_close(fPattern);
252 fPattern = NULL;
253 }
254 if (fPatternString != NULL) {
255 delete fPatternString;
256 fPatternString = NULL;
257 }
258 if (fNamedCaptureMap != NULL) {
259 uhash_close(fNamedCaptureMap);
260 fNamedCaptureMap = NULL;
261 }
262}
263
264
265//--------------------------------------------------------------------------
266//
267// Destructor
268//
269//--------------------------------------------------------------------------
270RegexPattern::~RegexPattern() {
271 zap();
272}
273
274
275//--------------------------------------------------------------------------
276//
277// Clone
278//
279//--------------------------------------------------------------------------
280RegexPattern *RegexPattern::clone() const {
281 RegexPattern *copy = new RegexPattern(*this);
282 return copy;
283}
284
285
286//--------------------------------------------------------------------------
287//
288// operator == (comparison) Consider to patterns to be == if the
289// pattern strings and the flags are the same.
290// Note that pattern strings with the same
291// characters can still be considered different.
292//
293//--------------------------------------------------------------------------
294UBool RegexPattern::operator ==(const RegexPattern &other) const {
295 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
296 if (this->fPatternString != NULL && other.fPatternString != NULL) {
297 return *(this->fPatternString) == *(other.fPatternString);
298 } else if (this->fPattern == NULL) {
299 if (other.fPattern == NULL) {
300 return TRUE;
301 }
302 } else if (other.fPattern != NULL) {
303 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
304 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
305 return utext_equals(this->fPattern, other.fPattern);
306 }
307 }
308 return FALSE;
309}
310
311//---------------------------------------------------------------------
312//
313// compile
314//
315//---------------------------------------------------------------------
316RegexPattern * U_EXPORT2
317RegexPattern::compile(const UnicodeString &regex,
318 uint32_t flags,
319 UParseError &pe,
320 UErrorCode &status)
321{
322 if (U_FAILURE(status)) {
323 return NULL;
324 }
325
326 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
327 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
328 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
329
330 if ((flags & ~allFlags) != 0) {
331 status = U_REGEX_INVALID_FLAG;
332 return NULL;
333 }
334
335 if ((flags & UREGEX_CANON_EQ) != 0) {
336 status = U_REGEX_UNIMPLEMENTED;
337 return NULL;
338 }
339
340 RegexPattern *This = new RegexPattern;
341 if (This == NULL) {
342 status = U_MEMORY_ALLOCATION_ERROR;
343 return NULL;
344 }
345 if (U_FAILURE(This->fDeferredStatus)) {
346 status = This->fDeferredStatus;
347 delete This;
348 return NULL;
349 }
350 This->fFlags = flags;
351
352 RegexCompile compiler(This, status);
353 compiler.compile(regex, pe, status);
354
355 if (U_FAILURE(status)) {
356 delete This;
357 This = NULL;
358 }
359
360 return This;
361}
362
363
364//
365// compile, UText mode
366//
367RegexPattern * U_EXPORT2
368RegexPattern::compile(UText *regex,
369 uint32_t flags,
370 UParseError &pe,
371 UErrorCode &status)
372{
373 if (U_FAILURE(status)) {
374 return NULL;
375 }
376
377 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
378 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
379 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
380
381 if ((flags & ~allFlags) != 0) {
382 status = U_REGEX_INVALID_FLAG;
383 return NULL;
384 }
385
386 if ((flags & UREGEX_CANON_EQ) != 0) {
387 status = U_REGEX_UNIMPLEMENTED;
388 return NULL;
389 }
390
391 RegexPattern *This = new RegexPattern;
392 if (This == NULL) {
393 status = U_MEMORY_ALLOCATION_ERROR;
394 return NULL;
395 }
396 if (U_FAILURE(This->fDeferredStatus)) {
397 status = This->fDeferredStatus;
398 delete This;
399 return NULL;
400 }
401 This->fFlags = flags;
402
403 RegexCompile compiler(This, status);
404 compiler.compile(regex, pe, status);
405
406 if (U_FAILURE(status)) {
407 delete This;
408 This = NULL;
409 }
410
411 return This;
412}
413
414//
415// compile with default flags.
416//
417RegexPattern * U_EXPORT2
418RegexPattern::compile(const UnicodeString &regex,
419 UParseError &pe,
420 UErrorCode &err)
421{
422 return compile(regex, 0, pe, err);
423}
424
425
426//
427// compile with default flags, UText mode
428//
429RegexPattern * U_EXPORT2
430RegexPattern::compile(UText *regex,
431 UParseError &pe,
432 UErrorCode &err)
433{
434 return compile(regex, 0, pe, err);
435}
436
437
438//
439// compile with no UParseErr parameter.
440//
441RegexPattern * U_EXPORT2
442RegexPattern::compile(const UnicodeString &regex,
443 uint32_t flags,
444 UErrorCode &err)
445{
446 UParseError pe;
447 return compile(regex, flags, pe, err);
448}
449
450
451//
452// compile with no UParseErr parameter, UText mode
453//
454RegexPattern * U_EXPORT2
455RegexPattern::compile(UText *regex,
456 uint32_t flags,
457 UErrorCode &err)
458{
459 UParseError pe;
460 return compile(regex, flags, pe, err);
461}
462
463
464//---------------------------------------------------------------------
465//
466// flags
467//
468//---------------------------------------------------------------------
469uint32_t RegexPattern::flags() const {
470 return fFlags;
471}
472
473
474//---------------------------------------------------------------------
475//
476// matcher(UnicodeString, err)
477//
478//---------------------------------------------------------------------
479RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
480 UErrorCode &status) const {
481 RegexMatcher *retMatcher = matcher(status);
482 if (retMatcher != NULL) {
483 retMatcher->fDeferredStatus = status;
484 retMatcher->reset(input);
485 }
486 return retMatcher;
487}
488
489
490//---------------------------------------------------------------------
491//
492// matcher(status)
493//
494//---------------------------------------------------------------------
495RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
496 RegexMatcher *retMatcher = NULL;
497
498 if (U_FAILURE(status)) {
499 return NULL;
500 }
501 if (U_FAILURE(fDeferredStatus)) {
502 status = fDeferredStatus;
503 return NULL;
504 }
505
506 retMatcher = new RegexMatcher(this);
507 if (retMatcher == NULL) {
508 status = U_MEMORY_ALLOCATION_ERROR;
509 return NULL;
510 }
511 return retMatcher;
512}
513
514
515
516//---------------------------------------------------------------------
517//
518// matches Convenience function to test for a match, starting
519// with a pattern string and a data string.
520//
521//---------------------------------------------------------------------
522UBool U_EXPORT2 RegexPattern::matches(const UnicodeString &regex,
523 const UnicodeString &input,
524 UParseError &pe,
525 UErrorCode &status) {
526
527 if (U_FAILURE(status)) {return FALSE;}
528
529 UBool retVal;
530 RegexPattern *pat = NULL;
531 RegexMatcher *matcher = NULL;
532
533 pat = RegexPattern::compile(regex, 0, pe, status);
534 matcher = pat->matcher(input, status);
535 retVal = matcher->matches(status);
536
537 delete matcher;
538 delete pat;
539 return retVal;
540}
541
542
543//
544// matches, UText mode
545//
546UBool U_EXPORT2 RegexPattern::matches(UText *regex,
547 UText *input,
548 UParseError &pe,
549 UErrorCode &status) {
550
551 if (U_FAILURE(status)) {return FALSE;}
552
553 UBool retVal = FALSE;
554 RegexPattern *pat = NULL;
555 RegexMatcher *matcher = NULL;
556
557 pat = RegexPattern::compile(regex, 0, pe, status);
558 matcher = pat->matcher(status);
559 if (U_SUCCESS(status)) {
560 matcher->reset(input);
561 retVal = matcher->matches(status);
562 }
563
564 delete matcher;
565 delete pat;
566 return retVal;
567}
568
569
570
571
572
573//---------------------------------------------------------------------
574//
575// pattern
576//
577//---------------------------------------------------------------------
578UnicodeString RegexPattern::pattern() const {
579 if (fPatternString != NULL) {
580 return *fPatternString;
581 } else if (fPattern == NULL) {
582 return UnicodeString();
583 } else {
584 UErrorCode status = U_ZERO_ERROR;
585 int64_t nativeLen = utext_nativeLength(fPattern);
586 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
587 UnicodeString result;
588
589 status = U_ZERO_ERROR;
590 UChar *resultChars = result.getBuffer(len16);
591 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
592 result.releaseBuffer(len16);
593
594 return result;
595 }
596}
597
598
599
600
601//---------------------------------------------------------------------
602//
603// patternText
604//
605//---------------------------------------------------------------------
606UText *RegexPattern::patternText(UErrorCode &status) const {
607 if (U_FAILURE(status)) {return NULL;}
608 status = U_ZERO_ERROR;
609
610 if (fPattern != NULL) {
611 return fPattern;
612 } else {
613 RegexStaticSets::initGlobals(&status);
614 return RegexStaticSets::gStaticSets->fEmptyText;
615 }
616}
617
618
619//--------------------------------------------------------------------------------
620//
621// groupNumberFromName()
622//
623//--------------------------------------------------------------------------------
624int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
625 if (U_FAILURE(status)) {
626 return 0;
627 }
628
629 // No need to explicitly check for syntactically valid names.
630 // Invalid ones will never be in the map, and the lookup will fail.
631
632 int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
633 if (number == 0) {
634 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
635 }
636 return number;
637}
638
639int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
640 if (U_FAILURE(status)) {
641 return 0;
642 }
643 UnicodeString name(groupName, nameLength, US_INV);
644 return groupNumberFromName(name, status);
645}
646
647
648//---------------------------------------------------------------------
649//
650// split
651//
652//---------------------------------------------------------------------
653int32_t RegexPattern::split(const UnicodeString &input,
654 UnicodeString dest[],
655 int32_t destCapacity,
656 UErrorCode &status) const
657{
658 if (U_FAILURE(status)) {
659 return 0;
660 }
661
662 RegexMatcher m(this);
663 int32_t r = 0;
664 // Check m's status to make sure all is ok.
665 if (U_SUCCESS(m.fDeferredStatus)) {
666 r = m.split(input, dest, destCapacity, status);
667 }
668 return r;
669}
670
671//
672// split, UText mode
673//
674int32_t RegexPattern::split(UText *input,
675 UText *dest[],
676 int32_t destCapacity,
677 UErrorCode &status) const
678{
679 if (U_FAILURE(status)) {
680 return 0;
681 }
682
683 RegexMatcher m(this);
684 int32_t r = 0;
685 // Check m's status to make sure all is ok.
686 if (U_SUCCESS(m.fDeferredStatus)) {
687 r = m.split(input, dest, destCapacity, status);
688 }
689 return r;
690}
691
692
693//---------------------------------------------------------------------
694//
695// dump Output the compiled form of the pattern.
696// Debugging function only.
697//
698//---------------------------------------------------------------------
699void RegexPattern::dumpOp(int32_t index) const {
700 (void)index; // Suppress warnings in non-debug build.
701#if defined(REGEX_DEBUG)
702 static const char * const opNames[] = {URX_OPCODE_NAMES};
703 int32_t op = fCompiledPat->elementAti(index);
704 int32_t val = URX_VAL(op);
705 int32_t type = URX_TYPE(op);
706 int32_t pinnedType = type;
707 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
708 pinnedType = 0;
709 }
710
711 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
712 switch (type) {
713 case URX_NOP:
714 case URX_DOTANY:
715 case URX_DOTANY_ALL:
716 case URX_FAIL:
717 case URX_CARET:
718 case URX_DOLLAR:
719 case URX_BACKSLASH_G:
720 case URX_BACKSLASH_X:
721 case URX_END:
722 case URX_DOLLAR_M:
723 case URX_CARET_M:
724 // Types with no operand field of interest.
725 break;
726
727 case URX_RESERVED_OP:
728 case URX_START_CAPTURE:
729 case URX_END_CAPTURE:
730 case URX_STATE_SAVE:
731 case URX_JMP:
732 case URX_JMP_SAV:
733 case URX_JMP_SAV_X:
734 case URX_BACKSLASH_B:
735 case URX_BACKSLASH_BU:
736 case URX_BACKSLASH_D:
737 case URX_BACKSLASH_Z:
738 case URX_STRING_LEN:
739 case URX_CTR_INIT:
740 case URX_CTR_INIT_NG:
741 case URX_CTR_LOOP:
742 case URX_CTR_LOOP_NG:
743 case URX_RELOC_OPRND:
744 case URX_STO_SP:
745 case URX_LD_SP:
746 case URX_BACKREF:
747 case URX_STO_INP_LOC:
748 case URX_JMPX:
749 case URX_LA_START:
750 case URX_LA_END:
751 case URX_BACKREF_I:
752 case URX_LB_START:
753 case URX_LB_CONT:
754 case URX_LB_END:
755 case URX_LBN_CONT:
756 case URX_LBN_END:
757 case URX_LOOP_C:
758 case URX_LOOP_DOT_I:
759 case URX_BACKSLASH_H:
760 case URX_BACKSLASH_R:
761 case URX_BACKSLASH_V:
762 // types with an integer operand field.
763 printf("%d", val);
764 break;
765
766 case URX_ONECHAR:
767 case URX_ONECHAR_I:
768 if (val < 0x20) {
769 printf("%#x", val);
770 } else {
771 printf("'%s'", CStr(UnicodeString(val))());
772 }
773 break;
774
775 case URX_STRING:
776 case URX_STRING_I:
777 {
778 int32_t lengthOp = fCompiledPat->elementAti(index+1);
779 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
780 int32_t length = URX_VAL(lengthOp);
781 UnicodeString str(fLiteralText, val, length);
782 printf("%s", CStr(str)());
783 }
784 break;
785
786 case URX_SETREF:
787 case URX_LOOP_SR_I:
788 {
789 UnicodeString s;
790 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
791 set->toPattern(s, TRUE);
792 printf("%s", CStr(s)());
793 }
794 break;
795
796 case URX_STATIC_SETREF:
797 case URX_STAT_SETREF_N:
798 {
799 UnicodeString s;
800 if (val & URX_NEG_SET) {
801 printf("NOT ");
802 val &= ~URX_NEG_SET;
803 }
804 UnicodeSet &set = RegexStaticSets::gStaticSets->fPropSets[val];
805 set.toPattern(s, TRUE);
806 printf("%s", CStr(s)());
807 }
808 break;
809
810
811 default:
812 printf("??????");
813 break;
814 }
815 printf("\n");
816#endif
817}
818
819
820void RegexPattern::dumpPattern() const {
821#if defined(REGEX_DEBUG)
822 int index;
823
824 UnicodeString patStr;
825 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
826 patStr.append(c);
827 }
828 printf("Original Pattern: \"%s\"\n", CStr(patStr)());
829 printf(" Min Match Length: %d\n", fMinMatchLen);
830 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
831 if (fStartType == START_STRING) {
832 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
833 printf(" Initial match string: \"%s\"\n", CStr(initialString)());
834 } else if (fStartType == START_SET) {
835 UnicodeString s;
836 fInitialChars->toPattern(s, TRUE);
837 printf(" Match First Chars: %s\n", CStr(s)());
838
839 } else if (fStartType == START_CHAR) {
840 printf(" First char of Match: ");
841 if (fInitialChar > 0x20) {
842 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
843 } else {
844 printf("%#x\n", fInitialChar);
845 }
846 }
847
848 printf("Named Capture Groups:\n");
849 if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
850 printf(" None\n");
851 } else {
852 int32_t pos = UHASH_FIRST;
853 const UHashElement *el = NULL;
854 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
855 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
856 int32_t number = el->value.integer;
857 printf(" %d\t%s\n", number, CStr(*name)());
858 }
859 }
860
861 printf("\nIndex Binary Type Operand\n" \
862 "-------------------------------------------\n");
863 for (index = 0; index<fCompiledPat->size(); index++) {
864 dumpOp(index);
865 }
866 printf("\n\n");
867#endif
868}
869
870
871
872UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
873
874U_NAMESPACE_END
875#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
876