1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3//
4// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
5//
6/*
7***************************************************************************
8* Copyright (C) 2002-2014 International Business Machines Corporation
9* and others. All rights reserved.
10***************************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_BREAK_ITERATION
16
17#include "unicode/unistr.h"
18#include "unicode/uniset.h"
19#include "unicode/uchar.h"
20#include "unicode/parsepos.h"
21
22#include "cstr.h"
23#include "rbbinode.h"
24#include "rbbirb.h"
25#include "umutex.h"
26
27
28//
29// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
30// when the hash table is deleted.
31//
32U_CDECL_BEGIN
33static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
34 icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
35 delete px;
36}
37U_CDECL_END
38
39
40
41U_NAMESPACE_BEGIN
42
43RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
44 :fRules(rules), fRuleScanner(rs), ffffString(char16_t(0xffff))
45{
46 fHashTable = nullptr;
47 fCachedSetLookup = nullptr;
48
49 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, &status);
50 // uhash_open checks status
51 if (U_FAILURE(status)) {
52 return;
53 }
54 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
55}
56
57
58
59RBBISymbolTable::~RBBISymbolTable()
60{
61 uhash_close(fHashTable);
62}
63
64
65//
66// RBBISymbolTable::lookup This function from the abstract symbol table interface
67// looks up a variable name and returns a UnicodeString
68// containing the substitution text.
69//
70// The variable name does NOT include the leading $.
71//
72const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
73{
74 RBBISymbolTableEntry *el;
75 RBBINode *varRefNode;
76 RBBINode *exprNode;
77 RBBINode *usetNode;
78 const UnicodeString *retString;
79 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
80
81 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
82 if (el == nullptr) {
83 return nullptr;
84 }
85
86 varRefNode = el->val;
87 exprNode = varRefNode->fLeftChild; // Root node of expression for variable
88 if (exprNode->fType == RBBINode::setRef) {
89 // The $variable refers to a single UnicodeSet
90 // return the ffffString, which will subsequently be interpreted as a
91 // stand-in character for the set by RBBISymbolTable::lookupMatcher()
92 usetNode = exprNode->fLeftChild;
93 This->fCachedSetLookup = usetNode->fInputSet;
94 retString = &ffffString;
95 }
96 else
97 {
98 // The variable refers to something other than just a set.
99 // return the original source string for the expression
100 retString = &exprNode->fText;
101 This->fCachedSetLookup = nullptr;
102 }
103 return retString;
104}
105
106
107
108//
109// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
110// interface maps a single stand-in character to a
111// pointer to a Unicode Set. The Unicode Set code uses this
112// mechanism to get all references to the same $variable
113// name to refer to a single common Unicode Set instance.
114//
115// This implementation cheats a little, and does not maintain a map of stand-in chars
116// to sets. Instead, it takes advantage of the fact that the UnicodeSet
117// constructor will always call this function right after calling lookup(),
118// and we just need to remember what set to return between these two calls.
119const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
120{
121 UnicodeSet *retVal = nullptr;
122 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
123 if (ch == 0xffff) {
124 retVal = fCachedSetLookup;
125 This->fCachedSetLookup = 0;
126 }
127 return retVal;
128}
129
130//
131// RBBISymbolTable::parseReference This function from the abstract symbol table interface
132// looks for a $variable name in the source text.
133// It does not look it up, only scans for it.
134// It is used by the UnicodeSet parser.
135//
136// This implementation is lifted pretty much verbatim
137// from the rules based transliterator implementation.
138// I didn't see an obvious way of sharing it.
139//
140UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
141 ParsePosition& pos, int32_t limit) const
142{
143 int32_t start = pos.getIndex();
144 int32_t i = start;
145 UnicodeString result;
146 while (i < limit) {
147 char16_t c = text.charAt(i);
148 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
149 break;
150 }
151 ++i;
152 }
153 if (i == start) { // No valid name chars
154 return result; // Indicate failure with empty string
155 }
156 pos.setIndex(i);
157 text.extractBetween(start, i, result);
158 return result;
159}
160
161
162
163//
164// RBBISymbolTable::lookupNode Given a key (a variable name), return the
165// corresponding RBBI Node. If there is no entry
166// in the table for this name, return nullptr.
167//
168RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
169
170 RBBINode *retNode = nullptr;
171 RBBISymbolTableEntry *el;
172
173 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
174 if (el != nullptr) {
175 retNode = el->val;
176 }
177 return retNode;
178}
179
180
181//
182// RBBISymbolTable::addEntry Add a new entry to the symbol table.
183// Indicate an error if the name already exists -
184// this will only occur in the case of duplicate
185// variable assignments.
186//
187void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
188 RBBISymbolTableEntry *e;
189 /* test for buffer overflows */
190 if (U_FAILURE(err)) {
191 return;
192 }
193 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
194 if (e != nullptr) {
195 err = U_BRK_VARIABLE_REDFINITION;
196 return;
197 }
198
199 e = new RBBISymbolTableEntry;
200 if (e == nullptr) {
201 err = U_MEMORY_ALLOCATION_ERROR;
202 return;
203 }
204 e->key = key;
205 e->val = val;
206 uhash_put( fHashTable, &e->key, e, &err);
207}
208
209
210RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(nullptr) {}
211
212RBBISymbolTableEntry::~RBBISymbolTableEntry() {
213 // The "val" of a symbol table entry is a variable reference node.
214 // The l. child of the val is the rhs expression from the assignment.
215 // Unlike other node types, children of variable reference nodes are not
216 // automatically recursively deleted. We do it manually here.
217 delete val->fLeftChild;
218 val->fLeftChild = nullptr;
219
220 delete val;
221
222 // Note: the key UnicodeString is destructed by virtue of being in the object by value.
223}
224
225
226//
227// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
228//
229#ifdef RBBI_DEBUG
230void RBBISymbolTable::rbbiSymtablePrint() const {
231 RBBIDebugPrintf("Variable Definitions Symbol Table\n"
232 "Name Node serial String Val\n"
233 "-------------------------------------------------------------------\n");
234
235 int32_t pos = UHASH_FIRST;
236 const UHashElement *e = nullptr;
237 for (;;) {
238 e = uhash_nextElement(fHashTable, &pos);
239 if (e == nullptr ) {
240 break;
241 }
242 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
243
244 RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum);
245 RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)());
246 }
247
248 RBBIDebugPrintf("\nParsed Variable Definitions\n");
249 pos = -1;
250 for (;;) {
251 e = uhash_nextElement(fHashTable, &pos);
252 if (e == nullptr ) {
253 break;
254 }
255 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
256 RBBIDebugPrintf("%s\n", CStr(s->key)());
257 RBBINode::printTree(s->val, true);
258 RBBINode::printTree(s->val->fLeftChild, false);
259 RBBIDebugPrintf("\n");
260 }
261}
262#endif
263
264
265
266
267
268U_NAMESPACE_END
269
270#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
271