| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ********************************************************************** | 
|---|
| 5 | * Copyright (c) 2003-2011, International Business Machines | 
|---|
| 6 | * Corporation and others.  All Rights Reserved. | 
|---|
| 7 | ********************************************************************** | 
|---|
| 8 | * Author: Alan Liu | 
|---|
| 9 | * Created: September 24 2003 | 
|---|
| 10 | * Since: ICU 2.8 | 
|---|
| 11 | ********************************************************************** | 
|---|
| 12 | */ | 
|---|
| 13 | #include "ruleiter.h" | 
|---|
| 14 | #include "unicode/parsepos.h" | 
|---|
| 15 | #include "unicode/symtable.h" | 
|---|
| 16 | #include "unicode/unistr.h" | 
|---|
| 17 | #include "unicode/utf16.h" | 
|---|
| 18 | #include "patternprops.h" | 
|---|
| 19 |  | 
|---|
| 20 | /* \U87654321 or \ud800\udc00 */ | 
|---|
| 21 | #define MAX_U_NOTATION_LEN 12 | 
|---|
| 22 |  | 
|---|
| 23 | U_NAMESPACE_BEGIN | 
|---|
| 24 |  | 
|---|
| 25 | RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, | 
|---|
| 26 | ParsePosition& thePos) : | 
|---|
| 27 | text(theText), | 
|---|
| 28 | pos(thePos), | 
|---|
| 29 | sym(theSym), | 
|---|
| 30 | buf(0), | 
|---|
| 31 | bufPos(0) | 
|---|
| 32 | {} | 
|---|
| 33 |  | 
|---|
| 34 | UBool RuleCharacterIterator::atEnd() const { | 
|---|
| 35 | return buf == 0 && pos.getIndex() == text.length(); | 
|---|
| 36 | } | 
|---|
| 37 |  | 
|---|
| 38 | UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { | 
|---|
| 39 | if (U_FAILURE(ec)) return DONE; | 
|---|
| 40 |  | 
|---|
| 41 | UChar32 c = DONE; | 
|---|
| 42 | isEscaped = FALSE; | 
|---|
| 43 |  | 
|---|
| 44 | for (;;) { | 
|---|
| 45 | c = _current(); | 
|---|
| 46 | _advance(U16_LENGTH(c)); | 
|---|
| 47 |  | 
|---|
| 48 | if (c == SymbolTable::SYMBOL_REF && buf == 0 && | 
|---|
| 49 | (options & PARSE_VARIABLES) != 0 && sym != 0) { | 
|---|
| 50 | UnicodeString name = sym->parseReference(text, pos, text.length()); | 
|---|
| 51 | // If name is empty there was an isolated SYMBOL_REF; | 
|---|
| 52 | // return it.  Caller must be prepared for this. | 
|---|
| 53 | if (name.length() == 0) { | 
|---|
| 54 | break; | 
|---|
| 55 | } | 
|---|
| 56 | bufPos = 0; | 
|---|
| 57 | buf = sym->lookup(name); | 
|---|
| 58 | if (buf == 0) { | 
|---|
| 59 | ec = U_UNDEFINED_VARIABLE; | 
|---|
| 60 | return DONE; | 
|---|
| 61 | } | 
|---|
| 62 | // Handle empty variable value | 
|---|
| 63 | if (buf->length() == 0) { | 
|---|
| 64 | buf = 0; | 
|---|
| 65 | } | 
|---|
| 66 | continue; | 
|---|
| 67 | } | 
|---|
| 68 |  | 
|---|
| 69 | if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { | 
|---|
| 70 | continue; | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { | 
|---|
| 74 | UnicodeString tempEscape; | 
|---|
| 75 | int32_t offset = 0; | 
|---|
| 76 | c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); | 
|---|
| 77 | jumpahead(offset); | 
|---|
| 78 | isEscaped = TRUE; | 
|---|
| 79 | if (c < 0) { | 
|---|
| 80 | ec = U_MALFORMED_UNICODE_ESCAPE; | 
|---|
| 81 | return DONE; | 
|---|
| 82 | } | 
|---|
| 83 | } | 
|---|
| 84 |  | 
|---|
| 85 | break; | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | return c; | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { | 
|---|
| 92 | p.buf = buf; | 
|---|
| 93 | p.pos = pos.getIndex(); | 
|---|
| 94 | p.bufPos = bufPos; | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { | 
|---|
| 98 | buf = p.buf; | 
|---|
| 99 | pos.setIndex(p.pos); | 
|---|
| 100 | bufPos = p.bufPos; | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | void RuleCharacterIterator::skipIgnored(int32_t options) { | 
|---|
| 104 | if ((options & SKIP_WHITESPACE) != 0) { | 
|---|
| 105 | for (;;) { | 
|---|
| 106 | UChar32 a = _current(); | 
|---|
| 107 | if (!PatternProps::isWhiteSpace(a)) break; | 
|---|
| 108 | _advance(U16_LENGTH(a)); | 
|---|
| 109 | } | 
|---|
| 110 | } | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 | UnicodeString& RuleCharacterIterator::(UnicodeString& result, int32_t maxLookAhead) const { | 
|---|
| 114 | if (maxLookAhead < 0) { | 
|---|
| 115 | maxLookAhead = 0x7FFFFFFF; | 
|---|
| 116 | } | 
|---|
| 117 | if (buf != 0) { | 
|---|
| 118 | buf->extract(bufPos, maxLookAhead, result); | 
|---|
| 119 | } else { | 
|---|
| 120 | text.extract(pos.getIndex(), maxLookAhead, result); | 
|---|
| 121 | } | 
|---|
| 122 | return result; | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | void RuleCharacterIterator::jumpahead(int32_t count) { | 
|---|
| 126 | _advance(count); | 
|---|
| 127 | } | 
|---|
| 128 |  | 
|---|
| 129 | /* | 
|---|
| 130 | UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { | 
|---|
| 131 | int32_t b = pos.getIndex(); | 
|---|
| 132 | text.extract(0, b, result); | 
|---|
| 133 | return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index | 
|---|
| 134 | } | 
|---|
| 135 | */ | 
|---|
| 136 |  | 
|---|
| 137 | UChar32 RuleCharacterIterator::_current() const { | 
|---|
| 138 | if (buf != 0) { | 
|---|
| 139 | return buf->char32At(bufPos); | 
|---|
| 140 | } else { | 
|---|
| 141 | int i = pos.getIndex(); | 
|---|
| 142 | return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; | 
|---|
| 143 | } | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | void RuleCharacterIterator::_advance(int32_t count) { | 
|---|
| 147 | if (buf != 0) { | 
|---|
| 148 | bufPos += count; | 
|---|
| 149 | if (bufPos == buf->length()) { | 
|---|
| 150 | buf = 0; | 
|---|
| 151 | } | 
|---|
| 152 | } else { | 
|---|
| 153 | pos.setIndex(pos.getIndex() + count); | 
|---|
| 154 | if (pos.getIndex() > text.length()) { | 
|---|
| 155 | pos.setIndex(text.length()); | 
|---|
| 156 | } | 
|---|
| 157 | } | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | U_NAMESPACE_END | 
|---|
| 161 |  | 
|---|
| 162 | //eof | 
|---|
| 163 |  | 
|---|