1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2010-2011, International Business Machines
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* file name: ucharstrieiterator.h
9* encoding: UTF-8
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2010nov15
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18#include "unicode/ucharstrie.h"
19#include "unicode/unistr.h"
20#include "uvectr32.h"
21
22U_NAMESPACE_BEGIN
23
24UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
25 UErrorCode &errorCode)
26 : uchars_(trieUChars),
27 pos_(uchars_), initialPos_(uchars_),
28 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
29 skipValue_(false),
30 maxLength_(maxStringLength), value_(0), stack_(nullptr) {
31 if(U_FAILURE(errorCode)) {
32 return;
33 }
34 // stack_ is a pointer so that it's easy to turn ucharstrie.h into
35 // a public API header for which we would want it to depend only on
36 // other public headers.
37 // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
38 // via the UnicodeString and UVector32 implementations, so this additional
39 // cost is minimal.
40 stack_=new UVector32(errorCode);
41 if(stack_==nullptr) {
42 errorCode=U_MEMORY_ALLOCATION_ERROR;
43 }
44}
45
46UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
47 UErrorCode &errorCode)
48 : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
49 remainingMatchLength_(trie.remainingMatchLength_),
50 initialRemainingMatchLength_(trie.remainingMatchLength_),
51 skipValue_(false),
52 maxLength_(maxStringLength), value_(0), stack_(nullptr) {
53 if(U_FAILURE(errorCode)) {
54 return;
55 }
56 stack_=new UVector32(errorCode);
57 if(U_FAILURE(errorCode)) {
58 return;
59 }
60 if(stack_==nullptr) {
61 errorCode=U_MEMORY_ALLOCATION_ERROR;
62 return;
63 }
64 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
65 if(length>=0) {
66 // Pending linear-match node, append remaining UChars to str_.
67 ++length;
68 if(maxLength_>0 && length>maxLength_) {
69 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
70 }
71 str_.append(pos_, length);
72 pos_+=length;
73 remainingMatchLength_-=length;
74 }
75}
76
77UCharsTrie::Iterator::~Iterator() {
78 delete stack_;
79}
80
81UCharsTrie::Iterator &
82UCharsTrie::Iterator::reset() {
83 pos_=initialPos_;
84 remainingMatchLength_=initialRemainingMatchLength_;
85 skipValue_=false;
86 int32_t length=remainingMatchLength_+1; // Remaining match length.
87 if(maxLength_>0 && length>maxLength_) {
88 length=maxLength_;
89 }
90 str_.truncate(length);
91 pos_+=length;
92 remainingMatchLength_-=length;
93 stack_->setSize(0);
94 return *this;
95}
96
97UBool
98UCharsTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
99
100UBool
101UCharsTrie::Iterator::next(UErrorCode &errorCode) {
102 if(U_FAILURE(errorCode)) {
103 return false;
104 }
105 const char16_t *pos=pos_;
106 if(pos==nullptr) {
107 if(stack_->isEmpty()) {
108 return false;
109 }
110 // Pop the state off the stack and continue with the next outbound edge of
111 // the branch node.
112 int32_t stackSize=stack_->size();
113 int32_t length=stack_->elementAti(stackSize-1);
114 pos=uchars_+stack_->elementAti(stackSize-2);
115 stack_->setSize(stackSize-2);
116 str_.truncate(length&0xffff);
117 length=(int32_t)((uint32_t)length>>16);
118 if(length>1) {
119 pos=branchNext(pos, length, errorCode);
120 if(pos==nullptr) {
121 return true; // Reached a final value.
122 }
123 } else {
124 str_.append(*pos++);
125 }
126 }
127 if(remainingMatchLength_>=0) {
128 // We only get here if we started in a pending linear-match node
129 // with more than maxLength remaining units.
130 return truncateAndStop();
131 }
132 for(;;) {
133 int32_t node=*pos++;
134 if(node>=kMinValueLead) {
135 if(skipValue_) {
136 pos=skipNodeValue(pos, node);
137 node&=kNodeTypeMask;
138 skipValue_=false;
139 } else {
140 // Deliver value for the string so far.
141 UBool isFinal=(UBool)(node>>15);
142 if(isFinal) {
143 value_=readValue(pos, node&0x7fff);
144 } else {
145 value_=readNodeValue(pos, node);
146 }
147 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
148 pos_=nullptr;
149 } else {
150 // We cannot skip the value right here because it shares its
151 // lead unit with a match node which we have to evaluate
152 // next time.
153 // Instead, keep pos_ on the node lead unit itself.
154 pos_=pos-1;
155 skipValue_=true;
156 }
157 return true;
158 }
159 }
160 if(maxLength_>0 && str_.length()==maxLength_) {
161 return truncateAndStop();
162 }
163 if(node<kMinLinearMatch) {
164 if(node==0) {
165 node=*pos++;
166 }
167 pos=branchNext(pos, node+1, errorCode);
168 if(pos==nullptr) {
169 return true; // Reached a final value.
170 }
171 } else {
172 // Linear-match node, append length units to str_.
173 int32_t length=node-kMinLinearMatch+1;
174 if(maxLength_>0 && str_.length()+length>maxLength_) {
175 str_.append(pos, maxLength_-str_.length());
176 return truncateAndStop();
177 }
178 str_.append(pos, length);
179 pos+=length;
180 }
181 }
182}
183
184// Branch node, needs to take the first outbound edge and push state for the rest.
185const char16_t *
186UCharsTrie::Iterator::branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode) {
187 while(length>kMaxBranchLinearSubNodeLength) {
188 ++pos; // ignore the comparison unit
189 // Push state for the greater-or-equal edge.
190 stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
191 stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
192 // Follow the less-than edge.
193 length>>=1;
194 pos=jumpByDelta(pos);
195 }
196 // List of key-value pairs where values are either final values or jump deltas.
197 // Read the first (key, value) pair.
198 char16_t trieUnit=*pos++;
199 int32_t node=*pos++;
200 UBool isFinal=(UBool)(node>>15);
201 int32_t value=readValue(pos, node&=0x7fff);
202 pos=skipValue(pos, node);
203 stack_->addElement((int32_t)(pos-uchars_), errorCode);
204 stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
205 str_.append(trieUnit);
206 if(isFinal) {
207 pos_=nullptr;
208 value_=value;
209 return nullptr;
210 } else {
211 return pos+value;
212 }
213}
214
215U_NAMESPACE_END
216