1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (c) 2002-2006, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | */ |
9 | #include "unicode/usetiter.h" |
10 | #include "unicode/uniset.h" |
11 | #include "unicode/unistr.h" |
12 | #include "uvector.h" |
13 | |
14 | U_NAMESPACE_BEGIN |
15 | |
16 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator) |
17 | |
18 | /** |
19 | * Create an iterator |
20 | * @param set set to iterate over |
21 | */ |
22 | UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) { |
23 | cpString = nullptr; |
24 | reset(uSet); |
25 | } |
26 | |
27 | /** |
28 | * Create an iterator. Convenience for when the contents are to be set later. |
29 | */ |
30 | UnicodeSetIterator::UnicodeSetIterator() { |
31 | this->set = nullptr; |
32 | cpString = nullptr; |
33 | reset(); |
34 | } |
35 | |
36 | UnicodeSetIterator::~UnicodeSetIterator() { |
37 | delete cpString; |
38 | } |
39 | |
40 | /** |
41 | * Returns the next element in the set. |
42 | * @return true if there was another element in the set. |
43 | * if so, if codepoint == IS_STRING, the value is a string in the string field |
44 | * else the value is a single code point in the codepoint field. |
45 | * <br>You are guaranteed that the codepoints are in sorted order, and the strings are in sorted order, |
46 | * and that all code points are returned before any strings are returned. |
47 | * <br>Note also that the codepointEnd is undefined after calling this method. |
48 | */ |
49 | UBool UnicodeSetIterator::next() { |
50 | if (nextElement <= endElement) { |
51 | codepoint = codepointEnd = nextElement++; |
52 | string = nullptr; |
53 | return true; |
54 | } |
55 | if (range < endRange) { |
56 | loadRange(++range); |
57 | codepoint = codepointEnd = nextElement++; |
58 | string = nullptr; |
59 | return true; |
60 | } |
61 | |
62 | if (nextString >= stringCount) return false; |
63 | codepoint = (UChar32)IS_STRING; // signal that value is actually a string |
64 | string = (const UnicodeString*) set->strings->elementAt(nextString++); |
65 | return true; |
66 | } |
67 | |
68 | /** |
69 | * @return true if there was another element in the set. |
70 | * if so, if codepoint == IS_STRING, the value is a string in the string field |
71 | * else the value is a range of codepoints in the <codepoint, codepointEnd> fields. |
72 | * <br>Note that the codepoints are in sorted order, and the strings are in sorted order, |
73 | * and that all code points are returned before any strings are returned. |
74 | * <br>You are guaranteed that the ranges are in sorted order, and the strings are in sorted order, |
75 | * and that all ranges are returned before any strings are returned. |
76 | * <br>You are also guaranteed that ranges are disjoint and non-contiguous. |
77 | * <br>Note also that the codepointEnd is undefined after calling this method. |
78 | */ |
79 | UBool UnicodeSetIterator::() { |
80 | string = nullptr; |
81 | if (nextElement <= endElement) { |
82 | codepointEnd = endElement; |
83 | codepoint = nextElement; |
84 | nextElement = endElement+1; |
85 | return true; |
86 | } |
87 | if (range < endRange) { |
88 | loadRange(++range); |
89 | codepointEnd = endElement; |
90 | codepoint = nextElement; |
91 | nextElement = endElement+1; |
92 | return true; |
93 | } |
94 | |
95 | if (nextString >= stringCount) return false; |
96 | codepoint = (UChar32)IS_STRING; // signal that value is actually a string |
97 | string = (const UnicodeString*) set->strings->elementAt(nextString++); |
98 | return true; |
99 | } |
100 | |
101 | /** |
102 | *@param set the set to iterate over. This allows reuse of the iterator. |
103 | */ |
104 | void UnicodeSetIterator::reset(const UnicodeSet& uSet) { |
105 | this->set = &uSet; |
106 | reset(); |
107 | } |
108 | |
109 | /** |
110 | * Resets to the start, to allow the iteration to start over again. |
111 | */ |
112 | void UnicodeSetIterator::reset() { |
113 | if (set == nullptr) { |
114 | // Set up indices to empty iteration |
115 | endRange = -1; |
116 | stringCount = 0; |
117 | } else { |
118 | endRange = set->getRangeCount() - 1; |
119 | stringCount = set->stringsSize(); |
120 | } |
121 | range = 0; |
122 | endElement = -1; |
123 | nextElement = 0; |
124 | if (endRange >= 0) { |
125 | loadRange(range); |
126 | } |
127 | nextString = 0; |
128 | string = nullptr; |
129 | } |
130 | |
131 | void UnicodeSetIterator::loadRange(int32_t iRange) { |
132 | nextElement = set->getRangeStart(iRange); |
133 | endElement = set->getRangeEnd(iRange); |
134 | } |
135 | |
136 | |
137 | const UnicodeString& UnicodeSetIterator::getString() { |
138 | if (string==nullptr && codepoint!=(UChar32)IS_STRING) { |
139 | if (cpString == nullptr) { |
140 | cpString = new UnicodeString(); |
141 | } |
142 | if (cpString != nullptr) { |
143 | cpString->setTo((UChar32)codepoint); |
144 | } |
145 | string = cpString; |
146 | } |
147 | return *string; |
148 | } |
149 | |
150 | U_NAMESPACE_END |
151 | |
152 | //eof |
153 | |