1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | // |
4 | // rbbitblb.h |
5 | // |
6 | |
7 | /* |
8 | ********************************************************************** |
9 | * Copyright (c) 2002-2016, International Business Machines |
10 | * Corporation and others. All Rights Reserved. |
11 | ********************************************************************** |
12 | */ |
13 | |
14 | #ifndef RBBITBLB_H |
15 | #define RBBITBLB_H |
16 | |
17 | #include "unicode/utypes.h" |
18 | |
19 | #if !UCONFIG_NO_BREAK_ITERATION |
20 | |
21 | #include "unicode/uobject.h" |
22 | #include "unicode/rbbi.h" |
23 | #include "rbbirb.h" |
24 | #include "rbbinode.h" |
25 | |
26 | |
27 | U_NAMESPACE_BEGIN |
28 | |
29 | class RBBIRuleScanner; |
30 | class RBBIRuleBuilder; |
31 | class UVector32; |
32 | |
33 | // |
34 | // class RBBITableBuilder is part of the RBBI rule compiler. |
35 | // It builds the state transition table used by the RBBI runtime |
36 | // from the expression syntax tree generated by the rule scanner. |
37 | // |
38 | // This class is part of the RBBI implementation only. |
39 | // There is no user-visible public API here. |
40 | // |
41 | |
42 | class RBBITableBuilder : public UMemory { |
43 | public: |
44 | RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status); |
45 | ~RBBITableBuilder(); |
46 | |
47 | void buildForwardTable(); |
48 | |
49 | /** Return the runtime size in bytes of the built state table. */ |
50 | int32_t getTableSize() const; |
51 | |
52 | /** Fill in the runtime state table. Sufficient memory must exist at the specified location. |
53 | */ |
54 | void exportTable(void *where); |
55 | |
56 | /** |
57 | * Find duplicate (redundant) character classes. Begin looking with categories.first. |
58 | * Duplicate, if found are returned in the categories parameter. |
59 | * This is an iterator-like function, used to identify character classes |
60 | * (state table columns) that can be eliminated. |
61 | * @param categories in/out parameter, specifies where to start looking for duplicates, |
62 | * and returns the first pair of duplicates found, if any. |
63 | * @return true if duplicate char classes were found, false otherwise. |
64 | */ |
65 | bool findDuplCharClassFrom(IntPair *categories); |
66 | |
67 | /** Remove a column from the state table. Used when two character categories |
68 | * have been found equivalent, and merged together, to eliminate the uneeded table column. |
69 | */ |
70 | void removeColumn(int32_t column); |
71 | |
72 | /** |
73 | * Check for, and remove dupicate states (table rows). |
74 | * @return the number of states removed. |
75 | */ |
76 | int32_t removeDuplicateStates(); |
77 | |
78 | /** Build the safe reverse table from the already-constructed forward table. */ |
79 | void buildSafeReverseTable(UErrorCode &status); |
80 | |
81 | /** Return the runtime size in bytes of the built safe reverse state table. */ |
82 | int32_t getSafeTableSize() const; |
83 | |
84 | /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location. |
85 | */ |
86 | void exportSafeTable(void *where); |
87 | |
88 | |
89 | private: |
90 | void calcNullable(RBBINode *n); |
91 | void calcFirstPos(RBBINode *n); |
92 | void calcLastPos(RBBINode *n); |
93 | void calcFollowPos(RBBINode *n); |
94 | void calcChainedFollowPos(RBBINode *n); |
95 | void bofFixup(); |
96 | void buildStateTable(); |
97 | void flagAcceptingStates(); |
98 | void flagLookAheadStates(); |
99 | void flagTaggedStates(); |
100 | void mergeRuleStatusVals(); |
101 | |
102 | /** |
103 | * Merge redundant state table columns, eliminating character classes with identical behavior. |
104 | * Done after the state tables are generated, just before converting to their run-time format. |
105 | */ |
106 | int32_t mergeColumns(); |
107 | |
108 | void addRuleRootNodes(UVector *dest, RBBINode *node); |
109 | |
110 | /** |
111 | * Find duplicate (redundant) states, beginning at the specified pair, |
112 | * within this state table. This is an iterator-like function, used to |
113 | * identify states (state table rows) that can be eliminated. |
114 | * @param states in/out parameter, specifies where to start looking for duplicates, |
115 | * and returns the first pair of duplicates found, if any. |
116 | * @return true if duplicate states were found, false otherwise. |
117 | */ |
118 | bool findDuplicateState(IntPair *states); |
119 | |
120 | /** Remove a duplicate state. |
121 | * @param duplStates The duplicate states. The first is kept, the second is removed. |
122 | * All references to the second in the state table are retargeted |
123 | * to the first. |
124 | */ |
125 | void removeState(IntPair duplStates); |
126 | |
127 | /** Find the next duplicate state in the safe reverse table. An iterator function. |
128 | * @param states in/out parameter, specifies where to start looking for duplicates, |
129 | * and returns the first pair of duplicates found, if any. |
130 | * @return true if a duplicate pair of states was found. |
131 | */ |
132 | bool findDuplicateSafeState(IntPair *states); |
133 | |
134 | /** Remove a duplicate state from the safe table. |
135 | * @param duplStates The duplicate states. The first is kept, the second is removed. |
136 | * All references to the second in the state table are retargeted |
137 | * to the first. |
138 | */ |
139 | void removeSafeState(IntPair duplStates); |
140 | |
141 | // Set functions for UVector. |
142 | // TODO: make a USet subclass of UVector |
143 | |
144 | void setAdd(UVector *dest, UVector *source); |
145 | UBool setEquals(UVector *a, UVector *b); |
146 | |
147 | void sortedAdd(UVector **dest, int32_t val); |
148 | |
149 | public: |
150 | #ifdef RBBI_DEBUG |
151 | void printSet(UVector *s); |
152 | void printPosSets(RBBINode *n /* = NULL*/); |
153 | void printStates(); |
154 | void printRuleStatusTable(); |
155 | void printReverseTable(); |
156 | #else |
157 | #define printSet(s) |
158 | #define printPosSets(n) |
159 | #define printStates() |
160 | #define printRuleStatusTable() |
161 | #define printReverseTable() |
162 | #endif |
163 | |
164 | private: |
165 | RBBIRuleBuilder *fRB; |
166 | RBBINode *&fTree; // The root node of the parse tree to build a |
167 | // table for. |
168 | UErrorCode *fStatus; |
169 | |
170 | /** State Descriptors, UVector<RBBIStateDescriptor> */ |
171 | UVector *fDStates; // D states (Aho's terminology) |
172 | // Index is state number |
173 | // Contents are RBBIStateDescriptor pointers. |
174 | |
175 | /** Synthesized safe table, UVector of UnicodeString, one string per table row. */ |
176 | UVector *fSafeTable; |
177 | |
178 | |
179 | RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class |
180 | RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class |
181 | }; |
182 | |
183 | // |
184 | // RBBIStateDescriptor - The DFA is constructed as a set of these descriptors, |
185 | // one for each state. |
186 | class RBBIStateDescriptor : public UMemory { |
187 | public: |
188 | UBool fMarked; |
189 | int32_t fAccepting; |
190 | int32_t fLookAhead; |
191 | UVector *fTagVals; |
192 | int32_t fTagsIdx; |
193 | UVector *fPositions; // Set of parse tree positions associated |
194 | // with this state. Unordered (it's a set). |
195 | // UVector contents are RBBINode * |
196 | |
197 | UVector32 *fDtran; // Transitions out of this state. |
198 | // indexed by input character |
199 | // contents is int index of dest state |
200 | // in RBBITableBuilder.fDStates |
201 | |
202 | RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus); |
203 | ~RBBIStateDescriptor(); |
204 | |
205 | private: |
206 | RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class |
207 | RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class |
208 | }; |
209 | |
210 | |
211 | |
212 | U_NAMESPACE_END |
213 | |
214 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
215 | |
216 | #endif |
217 | |