| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ********************************************************************** | 
|---|
| 5 | * Copyright (c) 2002-2014, International Business Machines | 
|---|
| 6 | * Corporation and others.  All Rights Reserved. | 
|---|
| 7 | ********************************************************************** | 
|---|
| 8 | */ | 
|---|
| 9 | #ifndef USETITER_H | 
|---|
| 10 | #define USETITER_H | 
|---|
| 11 |  | 
|---|
| 12 | #include "unicode/utypes.h" | 
|---|
| 13 |  | 
|---|
| 14 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 15 |  | 
|---|
| 16 | #include "unicode/uobject.h" | 
|---|
| 17 | #include "unicode/unistr.h" | 
|---|
| 18 |  | 
|---|
| 19 | /** | 
|---|
| 20 | * \file | 
|---|
| 21 | * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. | 
|---|
| 22 | */ | 
|---|
| 23 |  | 
|---|
| 24 | U_NAMESPACE_BEGIN | 
|---|
| 25 |  | 
|---|
| 26 | class UnicodeSet; | 
|---|
| 27 | class UnicodeString; | 
|---|
| 28 |  | 
|---|
| 29 | /** | 
|---|
| 30 | * | 
|---|
| 31 | * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It | 
|---|
| 32 | * iterates over either code points or code point ranges.  After all | 
|---|
| 33 | * code points or ranges have been returned, it returns the | 
|---|
| 34 | * multicharacter strings of the UnicodeSet, if any. | 
|---|
| 35 | * | 
|---|
| 36 | * This class is not intended to be subclassed.  Consider any fields | 
|---|
| 37 | *  or methods declared as "protected" to be private.  The use of | 
|---|
| 38 | *  protected in this class is an artifact of history. | 
|---|
| 39 | * | 
|---|
| 40 | * <p>To iterate over code points and strings, use a loop like this: | 
|---|
| 41 | * <pre> | 
|---|
| 42 | * UnicodeSetIterator it(set); | 
|---|
| 43 | * while (it.next()) { | 
|---|
| 44 | *     processItem(it.getString()); | 
|---|
| 45 | * } | 
|---|
| 46 | * </pre> | 
|---|
| 47 | * <p>Each item in the set is accessed as a string.  Set elements | 
|---|
| 48 | *    consisting of single code points are returned as strings containing | 
|---|
| 49 | *    just the one code point. | 
|---|
| 50 | * | 
|---|
| 51 | * <p>To iterate over code point ranges, instead of individual code points, | 
|---|
| 52 | *    use a loop like this: | 
|---|
| 53 | * <pre> | 
|---|
| 54 | * UnicodeSetIterator it(set); | 
|---|
| 55 | * while (it.nextRange()) { | 
|---|
| 56 | *   if (it.isString()) { | 
|---|
| 57 | *     processString(it.getString()); | 
|---|
| 58 | *   } else { | 
|---|
| 59 | *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd()); | 
|---|
| 60 | *   } | 
|---|
| 61 | * } | 
|---|
| 62 | * </pre> | 
|---|
| 63 | * @author M. Davis | 
|---|
| 64 | * @stable ICU 2.4 | 
|---|
| 65 | */ | 
|---|
| 66 | class U_COMMON_API UnicodeSetIterator : public UObject { | 
|---|
| 67 |  | 
|---|
| 68 | protected: | 
|---|
| 69 |  | 
|---|
| 70 | /** | 
|---|
| 71 | * Value of <tt>codepoint</tt> if the iterator points to a string. | 
|---|
| 72 | * If <tt>codepoint == IS_STRING</tt>, then examine | 
|---|
| 73 | * <tt>string</tt> for the current iteration result. | 
|---|
| 74 | * @stable ICU 2.4 | 
|---|
| 75 | */ | 
|---|
| 76 | enum { IS_STRING = -1 }; | 
|---|
| 77 |  | 
|---|
| 78 | /** | 
|---|
| 79 | * Current code point, or the special value <tt>IS_STRING</tt>, if | 
|---|
| 80 | * the iterator points to a string. | 
|---|
| 81 | * @stable ICU 2.4 | 
|---|
| 82 | */ | 
|---|
| 83 | UChar32 codepoint; | 
|---|
| 84 |  | 
|---|
| 85 | /** | 
|---|
| 86 | * When iterating over ranges using <tt>nextRange()</tt>, | 
|---|
| 87 | * <tt>codepointEnd</tt> contains the inclusive end of the | 
|---|
| 88 | * iteration range, if <tt>codepoint != IS_STRING</tt>.  If | 
|---|
| 89 | * iterating over code points using <tt>next()</tt>, or if | 
|---|
| 90 | * <tt>codepoint == IS_STRING</tt>, then the value of | 
|---|
| 91 | * <tt>codepointEnd</tt> is undefined. | 
|---|
| 92 | * @stable ICU 2.4 | 
|---|
| 93 | */ | 
|---|
| 94 | UChar32 codepointEnd; | 
|---|
| 95 |  | 
|---|
| 96 | /** | 
|---|
| 97 | * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points | 
|---|
| 98 | * to the current string.  If <tt>codepoint != IS_STRING</tt>, the | 
|---|
| 99 | * value of <tt>string</tt> is undefined. | 
|---|
| 100 | * @stable ICU 2.4 | 
|---|
| 101 | */ | 
|---|
| 102 | const UnicodeString* string; | 
|---|
| 103 |  | 
|---|
| 104 | public: | 
|---|
| 105 |  | 
|---|
| 106 | /** | 
|---|
| 107 | * Create an iterator over the given set.  The iterator is valid | 
|---|
| 108 | * only so long as <tt>set</tt> is valid. | 
|---|
| 109 | * @param set set to iterate over | 
|---|
| 110 | * @stable ICU 2.4 | 
|---|
| 111 | */ | 
|---|
| 112 | UnicodeSetIterator(const UnicodeSet& set); | 
|---|
| 113 |  | 
|---|
| 114 | /** | 
|---|
| 115 | * Create an iterator over nothing.  <tt>next()</tt> and | 
|---|
| 116 | * <tt>nextRange()</tt> return false. This is a convenience | 
|---|
| 117 | * constructor allowing the target to be set later. | 
|---|
| 118 | * @stable ICU 2.4 | 
|---|
| 119 | */ | 
|---|
| 120 | UnicodeSetIterator(); | 
|---|
| 121 |  | 
|---|
| 122 | /** | 
|---|
| 123 | * Destructor. | 
|---|
| 124 | * @stable ICU 2.4 | 
|---|
| 125 | */ | 
|---|
| 126 | virtual ~UnicodeSetIterator(); | 
|---|
| 127 |  | 
|---|
| 128 | /** | 
|---|
| 129 | * Returns true if the current element is a string.  If so, the | 
|---|
| 130 | * caller can retrieve it with <tt>getString()</tt>.  If this | 
|---|
| 131 | * method returns false, the current element is a code point or | 
|---|
| 132 | * code point range, depending on whether <tt>next()</tt> or | 
|---|
| 133 | * <tt>nextRange()</tt> was called. | 
|---|
| 134 | * Elements of types string and codepoint can both be retrieved | 
|---|
| 135 | * with the function <tt>getString()</tt>. | 
|---|
| 136 | * Elements of type codepoint can also be retrieved with | 
|---|
| 137 | * <tt>getCodepoint()</tt>. | 
|---|
| 138 | * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint | 
|---|
| 139 | * of the range, and <tt>getCodepointEnd()</tt> returns the end | 
|---|
| 140 | * of the range. | 
|---|
| 141 | * @stable ICU 2.4 | 
|---|
| 142 | */ | 
|---|
| 143 | inline UBool isString() const; | 
|---|
| 144 |  | 
|---|
| 145 | /** | 
|---|
| 146 | * Returns the current code point, if <tt>isString()</tt> returned | 
|---|
| 147 | * false.  Otherwise returns an undefined result. | 
|---|
| 148 | * @stable ICU 2.4 | 
|---|
| 149 | */ | 
|---|
| 150 | inline UChar32 getCodepoint() const; | 
|---|
| 151 |  | 
|---|
| 152 | /** | 
|---|
| 153 | * Returns the end of the current code point range, if | 
|---|
| 154 | * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was | 
|---|
| 155 | * called.  Otherwise returns an undefined result. | 
|---|
| 156 | * @stable ICU 2.4 | 
|---|
| 157 | */ | 
|---|
| 158 | inline UChar32 getCodepointEnd() const; | 
|---|
| 159 |  | 
|---|
| 160 | /** | 
|---|
| 161 | * Returns the current string, if <tt>isString()</tt> returned | 
|---|
| 162 | * true.  If the current iteration item is a code point, a UnicodeString | 
|---|
| 163 | * containing that single code point is returned. | 
|---|
| 164 | * | 
|---|
| 165 | * Ownership of the returned string remains with the iterator. | 
|---|
| 166 | * The string is guaranteed to remain valid only until the iterator is | 
|---|
| 167 | *   advanced to the next item, or until the iterator is deleted. | 
|---|
| 168 | * | 
|---|
| 169 | * @stable ICU 2.4 | 
|---|
| 170 | */ | 
|---|
| 171 | const UnicodeString& getString(); | 
|---|
| 172 |  | 
|---|
| 173 | /** | 
|---|
| 174 | * Advances the iteration position to the next element in the set, | 
|---|
| 175 | * which can be either a single code point or a string. | 
|---|
| 176 | * If there are no more elements in the set, return false. | 
|---|
| 177 | * | 
|---|
| 178 | * <p> | 
|---|
| 179 | * If <tt>isString() == TRUE</tt>, the value is a | 
|---|
| 180 | * string, otherwise the value is a | 
|---|
| 181 | * single code point.  Elements of either type can be retrieved | 
|---|
| 182 | * with the function <tt>getString()</tt>, while elements of | 
|---|
| 183 | * consisting of a single code point can be retrieved with | 
|---|
| 184 | * <tt>getCodepoint()</tt> | 
|---|
| 185 | * | 
|---|
| 186 | * <p>The order of iteration is all code points in sorted order, | 
|---|
| 187 | * followed by all strings sorted order.    Do not mix | 
|---|
| 188 | * calls to <tt>next()</tt> and <tt>nextRange()</tt> without | 
|---|
| 189 | * calling <tt>reset()</tt> between them.  The results of doing so | 
|---|
| 190 | * are undefined. | 
|---|
| 191 | * | 
|---|
| 192 | * @return true if there was another element in the set. | 
|---|
| 193 | * @stable ICU 2.4 | 
|---|
| 194 | */ | 
|---|
| 195 | UBool next(); | 
|---|
| 196 |  | 
|---|
| 197 | /** | 
|---|
| 198 | * Returns the next element in the set, either a code point range | 
|---|
| 199 | * or a string.  If there are no more elements in the set, return | 
|---|
| 200 | * false.  If <tt>isString() == TRUE</tt>, the value is a | 
|---|
| 201 | * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a | 
|---|
| 202 | * range of one or more code points from <tt>getCodepoint()</tt> to | 
|---|
| 203 | * <tt>getCodepointeEnd()</tt> inclusive. | 
|---|
| 204 | * | 
|---|
| 205 | * <p>The order of iteration is all code points ranges in sorted | 
|---|
| 206 | * order, followed by all strings sorted order.  Ranges are | 
|---|
| 207 | * disjoint and non-contiguous.  The value returned from <tt>getString()</tt> | 
|---|
| 208 | * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to | 
|---|
| 209 | * <tt>next()</tt> and <tt>nextRange()</tt> without calling | 
|---|
| 210 | * <tt>reset()</tt> between them.  The results of doing so are | 
|---|
| 211 | * undefined. | 
|---|
| 212 | * | 
|---|
| 213 | * @return true if there was another element in the set. | 
|---|
| 214 | * @stable ICU 2.4 | 
|---|
| 215 | */ | 
|---|
| 216 | UBool (); | 
|---|
| 217 |  | 
|---|
| 218 | /** | 
|---|
| 219 | * Sets this iterator to visit the elements of the given set and | 
|---|
| 220 | * resets it to the start of that set.  The iterator is valid only | 
|---|
| 221 | * so long as <tt>set</tt> is valid. | 
|---|
| 222 | * @param set the set to iterate over. | 
|---|
| 223 | * @stable ICU 2.4 | 
|---|
| 224 | */ | 
|---|
| 225 | void reset(const UnicodeSet& set); | 
|---|
| 226 |  | 
|---|
| 227 | /** | 
|---|
| 228 | * Resets this iterator to the start of the set. | 
|---|
| 229 | * @stable ICU 2.4 | 
|---|
| 230 | */ | 
|---|
| 231 | void reset(); | 
|---|
| 232 |  | 
|---|
| 233 | /** | 
|---|
| 234 | * ICU "poor man's RTTI", returns a UClassID for this class. | 
|---|
| 235 | * | 
|---|
| 236 | * @stable ICU 2.4 | 
|---|
| 237 | */ | 
|---|
| 238 | static UClassID U_EXPORT2 getStaticClassID(); | 
|---|
| 239 |  | 
|---|
| 240 | /** | 
|---|
| 241 | * ICU "poor man's RTTI", returns a UClassID for the actual class. | 
|---|
| 242 | * | 
|---|
| 243 | * @stable ICU 2.4 | 
|---|
| 244 | */ | 
|---|
| 245 | virtual UClassID getDynamicClassID() const; | 
|---|
| 246 |  | 
|---|
| 247 | // ======================= PRIVATES =========================== | 
|---|
| 248 |  | 
|---|
| 249 | protected: | 
|---|
| 250 |  | 
|---|
| 251 | // endElement and nextElements are really UChar32's, but we keep | 
|---|
| 252 | // them as signed int32_t's so we can do comparisons with | 
|---|
| 253 | // endElement set to -1.  Leave them as int32_t's. | 
|---|
| 254 | /** The set | 
|---|
| 255 | * @stable ICU 2.4 | 
|---|
| 256 | */ | 
|---|
| 257 | const UnicodeSet* set; | 
|---|
| 258 | /** End range | 
|---|
| 259 | * @stable ICU 2.4 | 
|---|
| 260 | */ | 
|---|
| 261 | int32_t endRange; | 
|---|
| 262 | /** Range | 
|---|
| 263 | * @stable ICU 2.4 | 
|---|
| 264 | */ | 
|---|
| 265 | int32_t range; | 
|---|
| 266 | /** End element | 
|---|
| 267 | * @stable ICU 2.4 | 
|---|
| 268 | */ | 
|---|
| 269 | int32_t endElement; | 
|---|
| 270 | /** Next element | 
|---|
| 271 | * @stable ICU 2.4 | 
|---|
| 272 | */ | 
|---|
| 273 | int32_t nextElement; | 
|---|
| 274 | //UBool abbreviated; | 
|---|
| 275 | /** Next string | 
|---|
| 276 | * @stable ICU 2.4 | 
|---|
| 277 | */ | 
|---|
| 278 | int32_t nextString; | 
|---|
| 279 | /** String count | 
|---|
| 280 | * @stable ICU 2.4 | 
|---|
| 281 | */ | 
|---|
| 282 | int32_t stringCount; | 
|---|
| 283 |  | 
|---|
| 284 | /** | 
|---|
| 285 | *  Points to the string to use when the caller asks for a | 
|---|
| 286 | *  string and the current iteration item is a code point, not a string. | 
|---|
| 287 | *  @internal | 
|---|
| 288 | */ | 
|---|
| 289 | UnicodeString *cpString; | 
|---|
| 290 |  | 
|---|
| 291 | /** Copy constructor. Disallowed. | 
|---|
| 292 | * @stable ICU 2.4 | 
|---|
| 293 | */ | 
|---|
| 294 | UnicodeSetIterator(const UnicodeSetIterator&); // disallow | 
|---|
| 295 |  | 
|---|
| 296 | /** Assignment operator. Disallowed. | 
|---|
| 297 | * @stable ICU 2.4 | 
|---|
| 298 | */ | 
|---|
| 299 | UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow | 
|---|
| 300 |  | 
|---|
| 301 | /** Load range | 
|---|
| 302 | * @stable ICU 2.4 | 
|---|
| 303 | */ | 
|---|
| 304 | virtual void loadRange(int32_t range); | 
|---|
| 305 |  | 
|---|
| 306 | }; | 
|---|
| 307 |  | 
|---|
| 308 | inline UBool UnicodeSetIterator::isString() const { | 
|---|
| 309 | return codepoint == (UChar32)IS_STRING; | 
|---|
| 310 | } | 
|---|
| 311 |  | 
|---|
| 312 | inline UChar32 UnicodeSetIterator::getCodepoint() const { | 
|---|
| 313 | return codepoint; | 
|---|
| 314 | } | 
|---|
| 315 |  | 
|---|
| 316 | inline UChar32 UnicodeSetIterator::getCodepointEnd() const { | 
|---|
| 317 | return codepointEnd; | 
|---|
| 318 | } | 
|---|
| 319 |  | 
|---|
| 320 |  | 
|---|
| 321 | U_NAMESPACE_END | 
|---|
| 322 |  | 
|---|
| 323 | #endif /* U_SHOW_CPLUSPLUS_API */ | 
|---|
| 324 |  | 
|---|
| 325 | #endif | 
|---|
| 326 |  | 
|---|