| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ********************************************************************** | 
|---|
| 5 | * Copyright (C) 1999-2007, International Business Machines Corporation | 
|---|
| 6 | * and others. All Rights Reserved. | 
|---|
| 7 | ********************************************************************** | 
|---|
| 8 | *   Date        Name        Description | 
|---|
| 9 | *   11/17/99    aliu        Creation. | 
|---|
| 10 | ********************************************************************** | 
|---|
| 11 | */ | 
|---|
| 12 | #ifndef RBT_SET_H | 
|---|
| 13 | #define RBT_SET_H | 
|---|
| 14 |  | 
|---|
| 15 | #include "unicode/utypes.h" | 
|---|
| 16 |  | 
|---|
| 17 | #if !UCONFIG_NO_TRANSLITERATION | 
|---|
| 18 |  | 
|---|
| 19 | #include "unicode/uobject.h" | 
|---|
| 20 | #include "unicode/utrans.h" | 
|---|
| 21 | #include "uvector.h" | 
|---|
| 22 |  | 
|---|
| 23 | U_NAMESPACE_BEGIN | 
|---|
| 24 |  | 
|---|
| 25 | class Replaceable; | 
|---|
| 26 | class TransliterationRule; | 
|---|
| 27 | class TransliterationRuleData; | 
|---|
| 28 | class UnicodeFilter; | 
|---|
| 29 | class UnicodeString; | 
|---|
| 30 | class UnicodeSet; | 
|---|
| 31 |  | 
|---|
| 32 | /** | 
|---|
| 33 | * A set of rules for a <code>RuleBasedTransliterator</code>. | 
|---|
| 34 | * @author Alan Liu | 
|---|
| 35 | */ | 
|---|
| 36 | class TransliterationRuleSet : public UMemory { | 
|---|
| 37 | /** | 
|---|
| 38 | * Vector of rules, in the order added.  This is used while the | 
|---|
| 39 | * rule set is getting built.  After that, freeze() reorders and | 
|---|
| 40 | * indexes the rules into rules[].  Any given rule is stored once | 
|---|
| 41 | * in ruleVector, and one or more times in rules[].  ruleVector | 
|---|
| 42 | * owns and deletes the rules. | 
|---|
| 43 | */ | 
|---|
| 44 | UVector* ruleVector; | 
|---|
| 45 |  | 
|---|
| 46 | /** | 
|---|
| 47 | * Sorted and indexed table of rules.  This is created by freeze() | 
|---|
| 48 | * from the rules in ruleVector.  It contains alias pointers to | 
|---|
| 49 | * the rules in ruleVector.  It is zero before freeze() is called | 
|---|
| 50 | * and non-zero thereafter. | 
|---|
| 51 | */ | 
|---|
| 52 | TransliterationRule** rules; | 
|---|
| 53 |  | 
|---|
| 54 | /** | 
|---|
| 55 | * Index table.  For text having a first character c, compute x = c&0xFF. | 
|---|
| 56 | * Now use rules[index[x]..index[x+1]-1].  This index table is created by | 
|---|
| 57 | * freeze().  Before freeze() is called it contains garbage. | 
|---|
| 58 | */ | 
|---|
| 59 | int32_t index[257]; | 
|---|
| 60 |  | 
|---|
| 61 | /** | 
|---|
| 62 | * Length of the longest preceding context | 
|---|
| 63 | */ | 
|---|
| 64 | int32_t maxContextLength; | 
|---|
| 65 |  | 
|---|
| 66 | public: | 
|---|
| 67 |  | 
|---|
| 68 | /** | 
|---|
| 69 | * Construct a new empty rule set. | 
|---|
| 70 | * @param status    Output parameter filled in with success or failure status. | 
|---|
| 71 | */ | 
|---|
| 72 | TransliterationRuleSet(UErrorCode& status); | 
|---|
| 73 |  | 
|---|
| 74 | /** | 
|---|
| 75 | * Copy constructor. | 
|---|
| 76 | */ | 
|---|
| 77 | TransliterationRuleSet(const TransliterationRuleSet&); | 
|---|
| 78 |  | 
|---|
| 79 | /** | 
|---|
| 80 | * Destructor. | 
|---|
| 81 | */ | 
|---|
| 82 | virtual ~TransliterationRuleSet(); | 
|---|
| 83 |  | 
|---|
| 84 | /** | 
|---|
| 85 | * Change the data object that this rule belongs to.  Used | 
|---|
| 86 | * internally by the TransliterationRuleData copy constructor. | 
|---|
| 87 | * @param data    the new data value to be set. | 
|---|
| 88 | */ | 
|---|
| 89 | void setData(const TransliterationRuleData* data); | 
|---|
| 90 |  | 
|---|
| 91 | /** | 
|---|
| 92 | * Return the maximum context length. | 
|---|
| 93 | * @return the length of the longest preceding context. | 
|---|
| 94 | */ | 
|---|
| 95 | virtual int32_t getMaximumContextLength(void) const; | 
|---|
| 96 |  | 
|---|
| 97 | /** | 
|---|
| 98 | * Add a rule to this set.  Rules are added in order, and order is | 
|---|
| 99 | * significant.  The last call to this method must be followed by | 
|---|
| 100 | * a call to <code>freeze()</code> before the rule set is used. | 
|---|
| 101 | * This method must <em>not</em> be called after freeze() has been | 
|---|
| 102 | * called. | 
|---|
| 103 | * | 
|---|
| 104 | * @param adoptedRule the rule to add | 
|---|
| 105 | */ | 
|---|
| 106 | virtual void addRule(TransliterationRule* adoptedRule, | 
|---|
| 107 | UErrorCode& status); | 
|---|
| 108 |  | 
|---|
| 109 | /** | 
|---|
| 110 | * Check this for masked rules and index it to optimize performance. | 
|---|
| 111 | * The sequence of operations is: (1) add rules to a set using | 
|---|
| 112 | * <code>addRule()</code>; (2) freeze the set using | 
|---|
| 113 | * <code>freeze()</code>; (3) use the rule set.  If | 
|---|
| 114 | * <code>addRule()</code> is called after calling this method, it | 
|---|
| 115 | * invalidates this object, and this method must be called again. | 
|---|
| 116 | * That is, <code>freeze()</code> may be called multiple times, | 
|---|
| 117 | * although for optimal performance it shouldn't be. | 
|---|
| 118 | * @param parseError A pointer to UParseError to receive information about errors | 
|---|
| 119 | *                   occurred. | 
|---|
| 120 | * @param status     Output parameter filled in with success or failure status. | 
|---|
| 121 | */ | 
|---|
| 122 | virtual void freeze(UParseError& parseError, UErrorCode& status); | 
|---|
| 123 |  | 
|---|
| 124 | /** | 
|---|
| 125 | * Transliterate the given text with the given UTransPosition | 
|---|
| 126 | * indices.  Return TRUE if the transliteration should continue | 
|---|
| 127 | * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). | 
|---|
| 128 | * Note that FALSE is only ever returned if isIncremental is TRUE. | 
|---|
| 129 | * @param text the text to be transliterated | 
|---|
| 130 | * @param index the position indices, which will be updated | 
|---|
| 131 | * @param isIncremental if TRUE, assume new text may be inserted | 
|---|
| 132 | * at index.limit, and return FALSE if thre is a partial match. | 
|---|
| 133 | * @return TRUE unless a U_PARTIAL_MATCH has been obtained, | 
|---|
| 134 | * indicating that transliteration should stop until more text | 
|---|
| 135 | * arrives. | 
|---|
| 136 | */ | 
|---|
| 137 | UBool transliterate(Replaceable& text, | 
|---|
| 138 | UTransPosition& index, | 
|---|
| 139 | UBool isIncremental); | 
|---|
| 140 |  | 
|---|
| 141 | /** | 
|---|
| 142 | * Create rule strings that represents this rule set. | 
|---|
| 143 | * @param result string to receive the rule strings.  Current | 
|---|
| 144 | * contents will be deleted. | 
|---|
| 145 | * @param escapeUnprintable  True, will escape the unprintable characters | 
|---|
| 146 | * @return    A reference to 'result'. | 
|---|
| 147 | */ | 
|---|
| 148 | virtual UnicodeString& toRules(UnicodeString& result, | 
|---|
| 149 | UBool escapeUnprintable) const; | 
|---|
| 150 |  | 
|---|
| 151 | /** | 
|---|
| 152 | * Return the set of all characters that may be modified | 
|---|
| 153 | * (getTarget=false) or emitted (getTarget=true) by this set. | 
|---|
| 154 | */ | 
|---|
| 155 | UnicodeSet& getSourceTargetSet(UnicodeSet& result, | 
|---|
| 156 | UBool getTarget) const; | 
|---|
| 157 |  | 
|---|
| 158 | private: | 
|---|
| 159 |  | 
|---|
| 160 | TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class | 
|---|
| 161 | }; | 
|---|
| 162 |  | 
|---|
| 163 | U_NAMESPACE_END | 
|---|
| 164 |  | 
|---|
| 165 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | 
|---|
| 166 |  | 
|---|
| 167 | #endif | 
|---|
| 168 |  | 
|---|