1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | *************************************************************************** |
5 | * Copyright (C) 2002-2016 International Business Machines Corporation * |
6 | * and others. All rights reserved. * |
7 | *************************************************************************** |
8 | */ |
9 | |
10 | // |
11 | // File: rbbinode.cpp |
12 | // |
13 | // Implementation of class RBBINode, which represents a node in the |
14 | // tree generated when parsing the Rules Based Break Iterator rules. |
15 | // |
16 | // This "Class" is actually closer to a struct. |
17 | // Code using it is expected to directly access fields much of the time. |
18 | // |
19 | |
20 | #include "unicode/utypes.h" |
21 | |
22 | #if !UCONFIG_NO_BREAK_ITERATION |
23 | |
24 | #include "unicode/unistr.h" |
25 | #include "unicode/uniset.h" |
26 | #include "unicode/uchar.h" |
27 | #include "unicode/parsepos.h" |
28 | |
29 | #include "cstr.h" |
30 | #include "uvector.h" |
31 | |
32 | #include "rbbirb.h" |
33 | #include "rbbinode.h" |
34 | |
35 | #include "uassert.h" |
36 | |
37 | |
38 | U_NAMESPACE_BEGIN |
39 | |
40 | #ifdef RBBI_DEBUG |
41 | static int gLastSerial = 0; |
42 | #endif |
43 | |
44 | |
45 | //------------------------------------------------------------------------- |
46 | // |
47 | // Constructor. Just set the fields to reasonable default values. |
48 | // |
49 | //------------------------------------------------------------------------- |
50 | RBBINode::RBBINode(NodeType t) : UMemory() { |
51 | #ifdef RBBI_DEBUG |
52 | fSerialNum = ++gLastSerial; |
53 | #endif |
54 | fType = t; |
55 | fParent = NULL; |
56 | fLeftChild = NULL; |
57 | fRightChild = NULL; |
58 | fInputSet = NULL; |
59 | fFirstPos = 0; |
60 | fLastPos = 0; |
61 | fNullable = FALSE; |
62 | fLookAheadEnd = FALSE; |
63 | fRuleRoot = FALSE; |
64 | fChainIn = FALSE; |
65 | fVal = 0; |
66 | fPrecedence = precZero; |
67 | |
68 | UErrorCode status = U_ZERO_ERROR; |
69 | fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere |
70 | fLastPosSet = new UVector(status); |
71 | fFollowPos = new UVector(status); |
72 | if (t==opCat) {fPrecedence = precOpCat;} |
73 | else if (t==opOr) {fPrecedence = precOpOr;} |
74 | else if (t==opStart) {fPrecedence = precStart;} |
75 | else if (t==opLParen) {fPrecedence = precLParen;} |
76 | |
77 | } |
78 | |
79 | |
80 | RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { |
81 | #ifdef RBBI_DEBUG |
82 | fSerialNum = ++gLastSerial; |
83 | #endif |
84 | fType = other.fType; |
85 | fParent = NULL; |
86 | fLeftChild = NULL; |
87 | fRightChild = NULL; |
88 | fInputSet = other.fInputSet; |
89 | fPrecedence = other.fPrecedence; |
90 | fText = other.fText; |
91 | fFirstPos = other.fFirstPos; |
92 | fLastPos = other.fLastPos; |
93 | fNullable = other.fNullable; |
94 | fVal = other.fVal; |
95 | fRuleRoot = FALSE; |
96 | fChainIn = other.fChainIn; |
97 | UErrorCode status = U_ZERO_ERROR; |
98 | fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere |
99 | fLastPosSet = new UVector(status); |
100 | fFollowPos = new UVector(status); |
101 | } |
102 | |
103 | |
104 | //------------------------------------------------------------------------- |
105 | // |
106 | // Destructor. Deletes both this node AND any child nodes, |
107 | // except in the case of variable reference nodes. For |
108 | // these, the l. child points back to the definition, which |
109 | // is common for all references to the variable, meaning |
110 | // it can't be deleted here. |
111 | // |
112 | //------------------------------------------------------------------------- |
113 | RBBINode::~RBBINode() { |
114 | // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); |
115 | delete fInputSet; |
116 | fInputSet = NULL; |
117 | |
118 | switch (this->fType) { |
119 | case varRef: |
120 | case setRef: |
121 | // for these node types, multiple instances point to the same "children" |
122 | // Storage ownership of children handled elsewhere. Don't delete here. |
123 | break; |
124 | |
125 | default: |
126 | delete fLeftChild; |
127 | fLeftChild = NULL; |
128 | delete fRightChild; |
129 | fRightChild = NULL; |
130 | } |
131 | |
132 | |
133 | delete fFirstPosSet; |
134 | delete fLastPosSet; |
135 | delete fFollowPos; |
136 | |
137 | } |
138 | |
139 | |
140 | //------------------------------------------------------------------------- |
141 | // |
142 | // cloneTree Make a copy of the subtree rooted at this node. |
143 | // Discard any variable references encountered along the way, |
144 | // and replace with copies of the variable's definitions. |
145 | // Used to replicate the expression underneath variable |
146 | // references in preparation for generating the DFA tables. |
147 | // |
148 | //------------------------------------------------------------------------- |
149 | RBBINode *RBBINode::cloneTree() { |
150 | RBBINode *n; |
151 | |
152 | if (fType == RBBINode::varRef) { |
153 | // If the current node is a variable reference, skip over it |
154 | // and clone the definition of the variable instead. |
155 | n = fLeftChild->cloneTree(); |
156 | } else if (fType == RBBINode::uset) { |
157 | n = this; |
158 | } else { |
159 | n = new RBBINode(*this); |
160 | // Check for null pointer. |
161 | if (n != NULL) { |
162 | if (fLeftChild != NULL) { |
163 | n->fLeftChild = fLeftChild->cloneTree(); |
164 | n->fLeftChild->fParent = n; |
165 | } |
166 | if (fRightChild != NULL) { |
167 | n->fRightChild = fRightChild->cloneTree(); |
168 | n->fRightChild->fParent = n; |
169 | } |
170 | } |
171 | } |
172 | return n; |
173 | } |
174 | |
175 | |
176 | |
177 | //------------------------------------------------------------------------- |
178 | // |
179 | // flattenVariables Walk a parse tree, replacing any variable |
180 | // references with a copy of the variable's definition. |
181 | // Aside from variables, the tree is not changed. |
182 | // |
183 | // Return the root of the tree. If the root was not a variable |
184 | // reference, it remains unchanged - the root we started with |
185 | // is the root we return. If, however, the root was a variable |
186 | // reference, the root of the newly cloned replacement tree will |
187 | // be returned, and the original tree deleted. |
188 | // |
189 | // This function works by recursively walking the tree |
190 | // without doing anything until a variable reference is |
191 | // found, then calling cloneTree() at that point. Any |
192 | // nested references are handled by cloneTree(), not here. |
193 | // |
194 | //------------------------------------------------------------------------- |
195 | RBBINode *RBBINode::flattenVariables() { |
196 | if (fType == varRef) { |
197 | RBBINode *retNode = fLeftChild->cloneTree(); |
198 | if (retNode != NULL) { |
199 | retNode->fRuleRoot = this->fRuleRoot; |
200 | retNode->fChainIn = this->fChainIn; |
201 | } |
202 | delete this; // TODO: undefined behavior. Fix. |
203 | return retNode; |
204 | } |
205 | |
206 | if (fLeftChild != NULL) { |
207 | fLeftChild = fLeftChild->flattenVariables(); |
208 | fLeftChild->fParent = this; |
209 | } |
210 | if (fRightChild != NULL) { |
211 | fRightChild = fRightChild->flattenVariables(); |
212 | fRightChild->fParent = this; |
213 | } |
214 | return this; |
215 | } |
216 | |
217 | |
218 | //------------------------------------------------------------------------- |
219 | // |
220 | // flattenSets Walk the parse tree, replacing any nodes of type setRef |
221 | // with a copy of the expression tree for the set. A set's |
222 | // equivalent expression tree is precomputed and saved as |
223 | // the left child of the uset node. |
224 | // |
225 | //------------------------------------------------------------------------- |
226 | void RBBINode::flattenSets() { |
227 | U_ASSERT(fType != setRef); |
228 | |
229 | if (fLeftChild != NULL) { |
230 | if (fLeftChild->fType==setRef) { |
231 | RBBINode *setRefNode = fLeftChild; |
232 | RBBINode *usetNode = setRefNode->fLeftChild; |
233 | RBBINode *replTree = usetNode->fLeftChild; |
234 | fLeftChild = replTree->cloneTree(); |
235 | fLeftChild->fParent = this; |
236 | delete setRefNode; |
237 | } else { |
238 | fLeftChild->flattenSets(); |
239 | } |
240 | } |
241 | |
242 | if (fRightChild != NULL) { |
243 | if (fRightChild->fType==setRef) { |
244 | RBBINode *setRefNode = fRightChild; |
245 | RBBINode *usetNode = setRefNode->fLeftChild; |
246 | RBBINode *replTree = usetNode->fLeftChild; |
247 | fRightChild = replTree->cloneTree(); |
248 | fRightChild->fParent = this; |
249 | delete setRefNode; |
250 | } else { |
251 | fRightChild->flattenSets(); |
252 | } |
253 | } |
254 | } |
255 | |
256 | |
257 | |
258 | //------------------------------------------------------------------------- |
259 | // |
260 | // findNodes() Locate all the nodes of the specified type, starting |
261 | // at the specified root. |
262 | // |
263 | //------------------------------------------------------------------------- |
264 | void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { |
265 | /* test for buffer overflows */ |
266 | if (U_FAILURE(status)) { |
267 | return; |
268 | } |
269 | if (fType == kind) { |
270 | dest->addElement(this, status); |
271 | } |
272 | if (fLeftChild != NULL) { |
273 | fLeftChild->findNodes(dest, kind, status); |
274 | } |
275 | if (fRightChild != NULL) { |
276 | fRightChild->findNodes(dest, kind, status); |
277 | } |
278 | } |
279 | |
280 | |
281 | //------------------------------------------------------------------------- |
282 | // |
283 | // print. Print out a single node, for debugging. |
284 | // |
285 | //------------------------------------------------------------------------- |
286 | #ifdef RBBI_DEBUG |
287 | |
288 | static int32_t serial(const RBBINode *node) { |
289 | return (node == NULL? -1 : node->fSerialNum); |
290 | } |
291 | |
292 | |
293 | void RBBINode::printNode(const RBBINode *node) { |
294 | static const char * const nodeTypeNames[] = { |
295 | "setRef" , |
296 | "uset" , |
297 | "varRef" , |
298 | "leafChar" , |
299 | "lookAhead" , |
300 | "tag" , |
301 | "endMark" , |
302 | "opStart" , |
303 | "opCat" , |
304 | "opOr" , |
305 | "opStar" , |
306 | "opPlus" , |
307 | "opQuestion" , |
308 | "opBreak" , |
309 | "opReverse" , |
310 | "opLParen" |
311 | }; |
312 | |
313 | if (node==NULL) { |
314 | RBBIDebugPrintf("%10p" , (void *)node); |
315 | } else { |
316 | RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d " , |
317 | (void *)node, node->fSerialNum, nodeTypeNames[node->fType], |
318 | node->fRuleRoot?'R':' ', node->fChainIn?'C':' ', |
319 | serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent), |
320 | node->fFirstPos, node->fVal); |
321 | if (node->fType == varRef) { |
322 | RBBI_DEBUG_printUnicodeString(node->fText); |
323 | } |
324 | } |
325 | RBBIDebugPrintf("\n" ); |
326 | } |
327 | #endif |
328 | |
329 | |
330 | #ifdef RBBI_DEBUG |
331 | U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) { |
332 | RBBIDebugPrintf("%*s" , minWidth, CStr(s)()); |
333 | } |
334 | #endif |
335 | |
336 | |
337 | //------------------------------------------------------------------------- |
338 | // |
339 | // print. Print out the tree of nodes rooted at "this" |
340 | // |
341 | //------------------------------------------------------------------------- |
342 | #ifdef RBBI_DEBUG |
343 | void RBBINode::printNodeHeader() { |
344 | RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n" ); |
345 | } |
346 | |
347 | void RBBINode::printTree(const RBBINode *node, UBool printHeading) { |
348 | if (printHeading) { |
349 | printNodeHeader(); |
350 | } |
351 | printNode(node); |
352 | if (node != NULL) { |
353 | // Only dump the definition under a variable reference if asked to. |
354 | // Unconditinally dump children of all other node types. |
355 | if (node->fType != varRef) { |
356 | if (node->fLeftChild != NULL) { |
357 | printTree(node->fLeftChild, FALSE); |
358 | } |
359 | |
360 | if (node->fRightChild != NULL) { |
361 | printTree(node->fRightChild, FALSE); |
362 | } |
363 | } |
364 | } |
365 | } |
366 | #endif |
367 | |
368 | |
369 | |
370 | U_NAMESPACE_END |
371 | |
372 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
373 | |