1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (C) 1999-2007, International Business Machines Corporation |
6 | * and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | * Date Name Description |
9 | * 11/17/99 aliu Creation. |
10 | ********************************************************************** |
11 | */ |
12 | #ifndef RBT_SET_H |
13 | #define RBT_SET_H |
14 | |
15 | #include "unicode/utypes.h" |
16 | |
17 | #if !UCONFIG_NO_TRANSLITERATION |
18 | |
19 | #include "unicode/uobject.h" |
20 | #include "unicode/utrans.h" |
21 | #include "uvector.h" |
22 | |
23 | U_NAMESPACE_BEGIN |
24 | |
25 | class Replaceable; |
26 | class TransliterationRule; |
27 | class TransliterationRuleData; |
28 | class UnicodeFilter; |
29 | class UnicodeString; |
30 | class UnicodeSet; |
31 | |
32 | /** |
33 | * A set of rules for a <code>RuleBasedTransliterator</code>. |
34 | * @author Alan Liu |
35 | */ |
36 | class TransliterationRuleSet : public UMemory { |
37 | /** |
38 | * Vector of rules, in the order added. This is used while the |
39 | * rule set is getting built. After that, freeze() reorders and |
40 | * indexes the rules into rules[]. Any given rule is stored once |
41 | * in ruleVector, and one or more times in rules[]. ruleVector |
42 | * owns and deletes the rules. |
43 | */ |
44 | UVector* ruleVector; |
45 | |
46 | /** |
47 | * Sorted and indexed table of rules. This is created by freeze() |
48 | * from the rules in ruleVector. It contains alias pointers to |
49 | * the rules in ruleVector. It is zero before freeze() is called |
50 | * and non-zero thereafter. |
51 | */ |
52 | TransliterationRule** rules; |
53 | |
54 | /** |
55 | * Index table. For text having a first character c, compute x = c&0xFF. |
56 | * Now use rules[index[x]..index[x+1]-1]. This index table is created by |
57 | * freeze(). Before freeze() is called it contains garbage. |
58 | */ |
59 | int32_t index[257]; |
60 | |
61 | /** |
62 | * Length of the longest preceding context |
63 | */ |
64 | int32_t maxContextLength; |
65 | |
66 | public: |
67 | |
68 | /** |
69 | * Construct a new empty rule set. |
70 | * @param status Output parameter filled in with success or failure status. |
71 | */ |
72 | TransliterationRuleSet(UErrorCode& status); |
73 | |
74 | /** |
75 | * Copy constructor. |
76 | */ |
77 | TransliterationRuleSet(const TransliterationRuleSet&); |
78 | |
79 | /** |
80 | * Destructor. |
81 | */ |
82 | virtual ~TransliterationRuleSet(); |
83 | |
84 | /** |
85 | * Change the data object that this rule belongs to. Used |
86 | * internally by the TransliterationRuleData copy constructor. |
87 | * @param data the new data value to be set. |
88 | */ |
89 | void setData(const TransliterationRuleData* data); |
90 | |
91 | /** |
92 | * Return the maximum context length. |
93 | * @return the length of the longest preceding context. |
94 | */ |
95 | virtual int32_t getMaximumContextLength(void) const; |
96 | |
97 | /** |
98 | * Add a rule to this set. Rules are added in order, and order is |
99 | * significant. The last call to this method must be followed by |
100 | * a call to <code>freeze()</code> before the rule set is used. |
101 | * This method must <em>not</em> be called after freeze() has been |
102 | * called. |
103 | * |
104 | * @param adoptedRule the rule to add |
105 | */ |
106 | virtual void addRule(TransliterationRule* adoptedRule, |
107 | UErrorCode& status); |
108 | |
109 | /** |
110 | * Check this for masked rules and index it to optimize performance. |
111 | * The sequence of operations is: (1) add rules to a set using |
112 | * <code>addRule()</code>; (2) freeze the set using |
113 | * <code>freeze()</code>; (3) use the rule set. If |
114 | * <code>addRule()</code> is called after calling this method, it |
115 | * invalidates this object, and this method must be called again. |
116 | * That is, <code>freeze()</code> may be called multiple times, |
117 | * although for optimal performance it shouldn't be. |
118 | * @param parseError A pointer to UParseError to receive information about errors |
119 | * occurred. |
120 | * @param status Output parameter filled in with success or failure status. |
121 | */ |
122 | virtual void freeze(UParseError& parseError, UErrorCode& status); |
123 | |
124 | /** |
125 | * Transliterate the given text with the given UTransPosition |
126 | * indices. Return TRUE if the transliteration should continue |
127 | * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). |
128 | * Note that FALSE is only ever returned if isIncremental is TRUE. |
129 | * @param text the text to be transliterated |
130 | * @param index the position indices, which will be updated |
131 | * @param isIncremental if TRUE, assume new text may be inserted |
132 | * at index.limit, and return FALSE if thre is a partial match. |
133 | * @return TRUE unless a U_PARTIAL_MATCH has been obtained, |
134 | * indicating that transliteration should stop until more text |
135 | * arrives. |
136 | */ |
137 | UBool transliterate(Replaceable& text, |
138 | UTransPosition& index, |
139 | UBool isIncremental); |
140 | |
141 | /** |
142 | * Create rule strings that represents this rule set. |
143 | * @param result string to receive the rule strings. Current |
144 | * contents will be deleted. |
145 | * @param escapeUnprintable True, will escape the unprintable characters |
146 | * @return A reference to 'result'. |
147 | */ |
148 | virtual UnicodeString& toRules(UnicodeString& result, |
149 | UBool escapeUnprintable) const; |
150 | |
151 | /** |
152 | * Return the set of all characters that may be modified |
153 | * (getTarget=false) or emitted (getTarget=true) by this set. |
154 | */ |
155 | UnicodeSet& getSourceTargetSet(UnicodeSet& result, |
156 | UBool getTarget) const; |
157 | |
158 | private: |
159 | |
160 | TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class |
161 | }; |
162 | |
163 | U_NAMESPACE_END |
164 | |
165 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
166 | |
167 | #endif |
168 | |