1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING |
7 | |
8 | #include "formatted_string_builder.h" |
9 | #include "unicode/ustring.h" |
10 | #include "unicode/utf16.h" |
11 | #include "unicode/unum.h" // for UNumberFormatFields literals |
12 | |
13 | namespace { |
14 | |
15 | // A version of uprv_memcpy that checks for length 0. |
16 | // By default, uprv_memcpy requires a length of at least 1. |
17 | inline void uprv_memcpy2(void* dest, const void* src, size_t len) { |
18 | if (len > 0) { |
19 | uprv_memcpy(dest, src, len); |
20 | } |
21 | } |
22 | |
23 | // A version of uprv_memmove that checks for length 0. |
24 | // By default, uprv_memmove requires a length of at least 1. |
25 | inline void uprv_memmove2(void* dest, const void* src, size_t len) { |
26 | if (len > 0) { |
27 | uprv_memmove(dest, src, len); |
28 | } |
29 | } |
30 | |
31 | } // namespace |
32 | |
33 | |
34 | U_NAMESPACE_BEGIN |
35 | |
36 | FormattedStringBuilder::FormattedStringBuilder() { |
37 | #if U_DEBUG |
38 | // Initializing the memory to non-zero helps catch some bugs that involve |
39 | // reading from an improperly terminated string. |
40 | for (int32_t i=0; i<getCapacity(); i++) { |
41 | getCharPtr()[i] = 1; |
42 | } |
43 | #endif |
44 | } |
45 | |
46 | FormattedStringBuilder::~FormattedStringBuilder() { |
47 | if (fUsingHeap) { |
48 | uprv_free(fChars.heap.ptr); |
49 | uprv_free(fFields.heap.ptr); |
50 | } |
51 | } |
52 | |
53 | FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) { |
54 | *this = other; |
55 | } |
56 | |
57 | FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) { |
58 | // Check for self-assignment |
59 | if (this == &other) { |
60 | return *this; |
61 | } |
62 | |
63 | // Continue with deallocation and copying |
64 | if (fUsingHeap) { |
65 | uprv_free(fChars.heap.ptr); |
66 | uprv_free(fFields.heap.ptr); |
67 | fUsingHeap = false; |
68 | } |
69 | |
70 | int32_t capacity = other.getCapacity(); |
71 | if (capacity > DEFAULT_CAPACITY) { |
72 | // FIXME: uprv_malloc |
73 | // C++ note: malloc appears in two places: here and in prepareForInsertHelper. |
74 | auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity)); |
75 | auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity)); |
76 | if (newChars == nullptr || newFields == nullptr) { |
77 | // UErrorCode is not available; fail silently. |
78 | uprv_free(newChars); |
79 | uprv_free(newFields); |
80 | *this = FormattedStringBuilder(); // can't fail |
81 | return *this; |
82 | } |
83 | |
84 | fUsingHeap = true; |
85 | fChars.heap.capacity = capacity; |
86 | fChars.heap.ptr = newChars; |
87 | fFields.heap.capacity = capacity; |
88 | fFields.heap.ptr = newFields; |
89 | } |
90 | |
91 | uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity); |
92 | uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity); |
93 | |
94 | fZero = other.fZero; |
95 | fLength = other.fLength; |
96 | return *this; |
97 | } |
98 | |
99 | int32_t FormattedStringBuilder::length() const { |
100 | return fLength; |
101 | } |
102 | |
103 | int32_t FormattedStringBuilder::codePointCount() const { |
104 | return u_countChar32(getCharPtr() + fZero, fLength); |
105 | } |
106 | |
107 | UChar32 FormattedStringBuilder::getFirstCodePoint() const { |
108 | if (fLength == 0) { |
109 | return -1; |
110 | } |
111 | UChar32 cp; |
112 | U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp); |
113 | return cp; |
114 | } |
115 | |
116 | UChar32 FormattedStringBuilder::getLastCodePoint() const { |
117 | if (fLength == 0) { |
118 | return -1; |
119 | } |
120 | int32_t offset = fLength; |
121 | U16_BACK_1(getCharPtr() + fZero, 0, offset); |
122 | UChar32 cp; |
123 | U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); |
124 | return cp; |
125 | } |
126 | |
127 | UChar32 FormattedStringBuilder::codePointAt(int32_t index) const { |
128 | UChar32 cp; |
129 | U16_GET(getCharPtr() + fZero, 0, index, fLength, cp); |
130 | return cp; |
131 | } |
132 | |
133 | UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const { |
134 | int32_t offset = index; |
135 | U16_BACK_1(getCharPtr() + fZero, 0, offset); |
136 | UChar32 cp; |
137 | U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); |
138 | return cp; |
139 | } |
140 | |
141 | FormattedStringBuilder &FormattedStringBuilder::clear() { |
142 | // TODO: Reset the heap here? |
143 | fZero = getCapacity() / 2; |
144 | fLength = 0; |
145 | return *this; |
146 | } |
147 | |
148 | int32_t |
149 | FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) { |
150 | int32_t count = U16_LENGTH(codePoint); |
151 | int32_t position = prepareForInsert(index, count, status); |
152 | if (U_FAILURE(status)) { |
153 | return count; |
154 | } |
155 | if (count == 1) { |
156 | getCharPtr()[position] = (char16_t) codePoint; |
157 | getFieldPtr()[position] = field; |
158 | } else { |
159 | getCharPtr()[position] = U16_LEAD(codePoint); |
160 | getCharPtr()[position + 1] = U16_TRAIL(codePoint); |
161 | getFieldPtr()[position] = getFieldPtr()[position + 1] = field; |
162 | } |
163 | return count; |
164 | } |
165 | |
166 | int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field, |
167 | UErrorCode &status) { |
168 | if (unistr.length() == 0) { |
169 | // Nothing to insert. |
170 | return 0; |
171 | } else if (unistr.length() == 1) { |
172 | // Fast path: insert using insertCodePoint. |
173 | return insertCodePoint(index, unistr.charAt(0), field, status); |
174 | } else { |
175 | return insert(index, unistr, 0, unistr.length(), field, status); |
176 | } |
177 | } |
178 | |
179 | int32_t |
180 | FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, |
181 | Field field, UErrorCode &status) { |
182 | int32_t count = end - start; |
183 | int32_t position = prepareForInsert(index, count, status); |
184 | if (U_FAILURE(status)) { |
185 | return count; |
186 | } |
187 | for (int32_t i = 0; i < count; i++) { |
188 | getCharPtr()[position + i] = unistr.charAt(start + i); |
189 | getFieldPtr()[position + i] = field; |
190 | } |
191 | return count; |
192 | } |
193 | |
194 | int32_t |
195 | FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, |
196 | int32_t startOther, int32_t endOther, Field field, UErrorCode& status) { |
197 | int32_t thisLength = endThis - startThis; |
198 | int32_t otherLength = endOther - startOther; |
199 | int32_t count = otherLength - thisLength; |
200 | int32_t position; |
201 | if (count > 0) { |
202 | // Overall, chars need to be added. |
203 | position = prepareForInsert(startThis, count, status); |
204 | } else { |
205 | // Overall, chars need to be removed or kept the same. |
206 | position = remove(startThis, -count); |
207 | } |
208 | if (U_FAILURE(status)) { |
209 | return count; |
210 | } |
211 | for (int32_t i = 0; i < otherLength; i++) { |
212 | getCharPtr()[position + i] = unistr.charAt(startOther + i); |
213 | getFieldPtr()[position + i] = field; |
214 | } |
215 | return count; |
216 | } |
217 | |
218 | int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) { |
219 | return insert(fLength, other, status); |
220 | } |
221 | |
222 | int32_t |
223 | FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) { |
224 | if (this == &other) { |
225 | status = U_ILLEGAL_ARGUMENT_ERROR; |
226 | return 0; |
227 | } |
228 | int32_t count = other.fLength; |
229 | if (count == 0) { |
230 | // Nothing to insert. |
231 | return 0; |
232 | } |
233 | int32_t position = prepareForInsert(index, count, status); |
234 | if (U_FAILURE(status)) { |
235 | return count; |
236 | } |
237 | for (int32_t i = 0; i < count; i++) { |
238 | getCharPtr()[position + i] = other.charAt(i); |
239 | getFieldPtr()[position + i] = other.fieldAt(i); |
240 | } |
241 | return count; |
242 | } |
243 | |
244 | void FormattedStringBuilder::writeTerminator(UErrorCode& status) { |
245 | int32_t position = prepareForInsert(fLength, 1, status); |
246 | if (U_FAILURE(status)) { |
247 | return; |
248 | } |
249 | getCharPtr()[position] = 0; |
250 | getFieldPtr()[position] = kUndefinedField; |
251 | fLength--; |
252 | } |
253 | |
254 | int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) { |
255 | U_ASSERT(index >= 0); |
256 | U_ASSERT(index <= fLength); |
257 | U_ASSERT(count >= 0); |
258 | if (index == 0 && fZero - count >= 0) { |
259 | // Append to start |
260 | fZero -= count; |
261 | fLength += count; |
262 | return fZero; |
263 | } else if (index == fLength && fZero + fLength + count < getCapacity()) { |
264 | // Append to end |
265 | fLength += count; |
266 | return fZero + fLength - count; |
267 | } else { |
268 | // Move chars around and/or allocate more space |
269 | return prepareForInsertHelper(index, count, status); |
270 | } |
271 | } |
272 | |
273 | int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) { |
274 | int32_t oldCapacity = getCapacity(); |
275 | int32_t oldZero = fZero; |
276 | char16_t *oldChars = getCharPtr(); |
277 | Field *oldFields = getFieldPtr(); |
278 | if (fLength + count > oldCapacity) { |
279 | int32_t newCapacity = (fLength + count) * 2; |
280 | int32_t newZero = newCapacity / 2 - (fLength + count) / 2; |
281 | |
282 | // C++ note: malloc appears in two places: here and in the assignment operator. |
283 | auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity)); |
284 | auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity)); |
285 | if (newChars == nullptr || newFields == nullptr) { |
286 | uprv_free(newChars); |
287 | uprv_free(newFields); |
288 | status = U_MEMORY_ALLOCATION_ERROR; |
289 | return -1; |
290 | } |
291 | |
292 | // First copy the prefix and then the suffix, leaving room for the new chars that the |
293 | // caller wants to insert. |
294 | // C++ note: memcpy is OK because the src and dest do not overlap. |
295 | uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index); |
296 | uprv_memcpy2(newChars + newZero + index + count, |
297 | oldChars + oldZero + index, |
298 | sizeof(char16_t) * (fLength - index)); |
299 | uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index); |
300 | uprv_memcpy2(newFields + newZero + index + count, |
301 | oldFields + oldZero + index, |
302 | sizeof(Field) * (fLength - index)); |
303 | |
304 | if (fUsingHeap) { |
305 | uprv_free(oldChars); |
306 | uprv_free(oldFields); |
307 | } |
308 | fUsingHeap = true; |
309 | fChars.heap.ptr = newChars; |
310 | fChars.heap.capacity = newCapacity; |
311 | fFields.heap.ptr = newFields; |
312 | fFields.heap.capacity = newCapacity; |
313 | fZero = newZero; |
314 | fLength += count; |
315 | } else { |
316 | int32_t newZero = oldCapacity / 2 - (fLength + count) / 2; |
317 | |
318 | // C++ note: memmove is required because src and dest may overlap. |
319 | // First copy the entire string to the location of the prefix, and then move the suffix |
320 | // to make room for the new chars that the caller wants to insert. |
321 | uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength); |
322 | uprv_memmove2(oldChars + newZero + index + count, |
323 | oldChars + newZero + index, |
324 | sizeof(char16_t) * (fLength - index)); |
325 | uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength); |
326 | uprv_memmove2(oldFields + newZero + index + count, |
327 | oldFields + newZero + index, |
328 | sizeof(Field) * (fLength - index)); |
329 | |
330 | fZero = newZero; |
331 | fLength += count; |
332 | } |
333 | return fZero + index; |
334 | } |
335 | |
336 | int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) { |
337 | // TODO: Reset the heap here? (If the string after removal can fit on stack?) |
338 | int32_t position = index + fZero; |
339 | uprv_memmove2(getCharPtr() + position, |
340 | getCharPtr() + position + count, |
341 | sizeof(char16_t) * (fLength - index - count)); |
342 | uprv_memmove2(getFieldPtr() + position, |
343 | getFieldPtr() + position + count, |
344 | sizeof(Field) * (fLength - index - count)); |
345 | fLength -= count; |
346 | return position; |
347 | } |
348 | |
349 | UnicodeString FormattedStringBuilder::toUnicodeString() const { |
350 | return UnicodeString(getCharPtr() + fZero, fLength); |
351 | } |
352 | |
353 | const UnicodeString FormattedStringBuilder::toTempUnicodeString() const { |
354 | // Readonly-alias constructor: |
355 | return UnicodeString(FALSE, getCharPtr() + fZero, fLength); |
356 | } |
357 | |
358 | UnicodeString FormattedStringBuilder::toDebugString() const { |
359 | UnicodeString sb; |
360 | sb.append(u"<FormattedStringBuilder [" , -1); |
361 | sb.append(toUnicodeString()); |
362 | sb.append(u"] [" , -1); |
363 | for (int i = 0; i < fLength; i++) { |
364 | if (fieldAt(i) == kUndefinedField) { |
365 | sb.append(u'n'); |
366 | } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) { |
367 | char16_t c; |
368 | switch (fieldAt(i).getField()) { |
369 | case UNUM_SIGN_FIELD: |
370 | c = u'-'; |
371 | break; |
372 | case UNUM_INTEGER_FIELD: |
373 | c = u'i'; |
374 | break; |
375 | case UNUM_FRACTION_FIELD: |
376 | c = u'f'; |
377 | break; |
378 | case UNUM_EXPONENT_FIELD: |
379 | c = u'e'; |
380 | break; |
381 | case UNUM_EXPONENT_SIGN_FIELD: |
382 | c = u'+'; |
383 | break; |
384 | case UNUM_EXPONENT_SYMBOL_FIELD: |
385 | c = u'E'; |
386 | break; |
387 | case UNUM_DECIMAL_SEPARATOR_FIELD: |
388 | c = u'.'; |
389 | break; |
390 | case UNUM_GROUPING_SEPARATOR_FIELD: |
391 | c = u','; |
392 | break; |
393 | case UNUM_PERCENT_FIELD: |
394 | c = u'%'; |
395 | break; |
396 | case UNUM_PERMILL_FIELD: |
397 | c = u'‰'; |
398 | break; |
399 | case UNUM_CURRENCY_FIELD: |
400 | c = u'$'; |
401 | break; |
402 | default: |
403 | c = u'0' + fieldAt(i).getField(); |
404 | break; |
405 | } |
406 | sb.append(c); |
407 | } else { |
408 | sb.append(u'0' + fieldAt(i).getCategory()); |
409 | } |
410 | } |
411 | sb.append(u"]>" , -1); |
412 | return sb; |
413 | } |
414 | |
415 | const char16_t *FormattedStringBuilder::chars() const { |
416 | return getCharPtr() + fZero; |
417 | } |
418 | |
419 | bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const { |
420 | if (fLength != other.fLength) { |
421 | return false; |
422 | } |
423 | for (int32_t i = 0; i < fLength; i++) { |
424 | if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) { |
425 | return false; |
426 | } |
427 | } |
428 | return true; |
429 | } |
430 | |
431 | bool FormattedStringBuilder::containsField(Field field) const { |
432 | for (int32_t i = 0; i < fLength; i++) { |
433 | if (field == fieldAt(i)) { |
434 | return true; |
435 | } |
436 | } |
437 | return false; |
438 | } |
439 | |
440 | U_NAMESPACE_END |
441 | |
442 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
443 | |