1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7
8#include "formatted_string_builder.h"
9#include "unicode/ustring.h"
10#include "unicode/utf16.h"
11#include "unicode/unum.h" // for UNumberFormatFields literals
12
13namespace {
14
15// A version of uprv_memcpy that checks for length 0.
16// By default, uprv_memcpy requires a length of at least 1.
17inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
18 if (len > 0) {
19 uprv_memcpy(dest, src, len);
20 }
21}
22
23// A version of uprv_memmove that checks for length 0.
24// By default, uprv_memmove requires a length of at least 1.
25inline void uprv_memmove2(void* dest, const void* src, size_t len) {
26 if (len > 0) {
27 uprv_memmove(dest, src, len);
28 }
29}
30
31} // namespace
32
33
34U_NAMESPACE_BEGIN
35
36FormattedStringBuilder::FormattedStringBuilder() {
37#if U_DEBUG
38 // Initializing the memory to non-zero helps catch some bugs that involve
39 // reading from an improperly terminated string.
40 for (int32_t i=0; i<getCapacity(); i++) {
41 getCharPtr()[i] = 1;
42 }
43#endif
44}
45
46FormattedStringBuilder::~FormattedStringBuilder() {
47 if (fUsingHeap) {
48 uprv_free(fChars.heap.ptr);
49 uprv_free(fFields.heap.ptr);
50 }
51}
52
53FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
54 *this = other;
55}
56
57FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
58 // Check for self-assignment
59 if (this == &other) {
60 return *this;
61 }
62
63 // Continue with deallocation and copying
64 if (fUsingHeap) {
65 uprv_free(fChars.heap.ptr);
66 uprv_free(fFields.heap.ptr);
67 fUsingHeap = false;
68 }
69
70 int32_t capacity = other.getCapacity();
71 if (capacity > DEFAULT_CAPACITY) {
72 // FIXME: uprv_malloc
73 // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
74 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
75 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
76 if (newChars == nullptr || newFields == nullptr) {
77 // UErrorCode is not available; fail silently.
78 uprv_free(newChars);
79 uprv_free(newFields);
80 *this = FormattedStringBuilder(); // can't fail
81 return *this;
82 }
83
84 fUsingHeap = true;
85 fChars.heap.capacity = capacity;
86 fChars.heap.ptr = newChars;
87 fFields.heap.capacity = capacity;
88 fFields.heap.ptr = newFields;
89 }
90
91 uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
92 uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
93
94 fZero = other.fZero;
95 fLength = other.fLength;
96 return *this;
97}
98
99int32_t FormattedStringBuilder::length() const {
100 return fLength;
101}
102
103int32_t FormattedStringBuilder::codePointCount() const {
104 return u_countChar32(getCharPtr() + fZero, fLength);
105}
106
107UChar32 FormattedStringBuilder::getFirstCodePoint() const {
108 if (fLength == 0) {
109 return -1;
110 }
111 UChar32 cp;
112 U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
113 return cp;
114}
115
116UChar32 FormattedStringBuilder::getLastCodePoint() const {
117 if (fLength == 0) {
118 return -1;
119 }
120 int32_t offset = fLength;
121 U16_BACK_1(getCharPtr() + fZero, 0, offset);
122 UChar32 cp;
123 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
124 return cp;
125}
126
127UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
128 UChar32 cp;
129 U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
130 return cp;
131}
132
133UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
134 int32_t offset = index;
135 U16_BACK_1(getCharPtr() + fZero, 0, offset);
136 UChar32 cp;
137 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
138 return cp;
139}
140
141FormattedStringBuilder &FormattedStringBuilder::clear() {
142 // TODO: Reset the heap here?
143 fZero = getCapacity() / 2;
144 fLength = 0;
145 return *this;
146}
147
148int32_t
149FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
150 int32_t count = U16_LENGTH(codePoint);
151 int32_t position = prepareForInsert(index, count, status);
152 if (U_FAILURE(status)) {
153 return count;
154 }
155 if (count == 1) {
156 getCharPtr()[position] = (char16_t) codePoint;
157 getFieldPtr()[position] = field;
158 } else {
159 getCharPtr()[position] = U16_LEAD(codePoint);
160 getCharPtr()[position + 1] = U16_TRAIL(codePoint);
161 getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
162 }
163 return count;
164}
165
166int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
167 UErrorCode &status) {
168 if (unistr.length() == 0) {
169 // Nothing to insert.
170 return 0;
171 } else if (unistr.length() == 1) {
172 // Fast path: insert using insertCodePoint.
173 return insertCodePoint(index, unistr.charAt(0), field, status);
174 } else {
175 return insert(index, unistr, 0, unistr.length(), field, status);
176 }
177}
178
179int32_t
180FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
181 Field field, UErrorCode &status) {
182 int32_t count = end - start;
183 int32_t position = prepareForInsert(index, count, status);
184 if (U_FAILURE(status)) {
185 return count;
186 }
187 for (int32_t i = 0; i < count; i++) {
188 getCharPtr()[position + i] = unistr.charAt(start + i);
189 getFieldPtr()[position + i] = field;
190 }
191 return count;
192}
193
194int32_t
195FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
196 int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
197 int32_t thisLength = endThis - startThis;
198 int32_t otherLength = endOther - startOther;
199 int32_t count = otherLength - thisLength;
200 int32_t position;
201 if (count > 0) {
202 // Overall, chars need to be added.
203 position = prepareForInsert(startThis, count, status);
204 } else {
205 // Overall, chars need to be removed or kept the same.
206 position = remove(startThis, -count);
207 }
208 if (U_FAILURE(status)) {
209 return count;
210 }
211 for (int32_t i = 0; i < otherLength; i++) {
212 getCharPtr()[position + i] = unistr.charAt(startOther + i);
213 getFieldPtr()[position + i] = field;
214 }
215 return count;
216}
217
218int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
219 return insert(fLength, other, status);
220}
221
222int32_t
223FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
224 if (this == &other) {
225 status = U_ILLEGAL_ARGUMENT_ERROR;
226 return 0;
227 }
228 int32_t count = other.fLength;
229 if (count == 0) {
230 // Nothing to insert.
231 return 0;
232 }
233 int32_t position = prepareForInsert(index, count, status);
234 if (U_FAILURE(status)) {
235 return count;
236 }
237 for (int32_t i = 0; i < count; i++) {
238 getCharPtr()[position + i] = other.charAt(i);
239 getFieldPtr()[position + i] = other.fieldAt(i);
240 }
241 return count;
242}
243
244void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
245 int32_t position = prepareForInsert(fLength, 1, status);
246 if (U_FAILURE(status)) {
247 return;
248 }
249 getCharPtr()[position] = 0;
250 getFieldPtr()[position] = kUndefinedField;
251 fLength--;
252}
253
254int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
255 U_ASSERT(index >= 0);
256 U_ASSERT(index <= fLength);
257 U_ASSERT(count >= 0);
258 if (index == 0 && fZero - count >= 0) {
259 // Append to start
260 fZero -= count;
261 fLength += count;
262 return fZero;
263 } else if (index == fLength && fZero + fLength + count < getCapacity()) {
264 // Append to end
265 fLength += count;
266 return fZero + fLength - count;
267 } else {
268 // Move chars around and/or allocate more space
269 return prepareForInsertHelper(index, count, status);
270 }
271}
272
273int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
274 int32_t oldCapacity = getCapacity();
275 int32_t oldZero = fZero;
276 char16_t *oldChars = getCharPtr();
277 Field *oldFields = getFieldPtr();
278 if (fLength + count > oldCapacity) {
279 int32_t newCapacity = (fLength + count) * 2;
280 int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
281
282 // C++ note: malloc appears in two places: here and in the assignment operator.
283 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
284 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
285 if (newChars == nullptr || newFields == nullptr) {
286 uprv_free(newChars);
287 uprv_free(newFields);
288 status = U_MEMORY_ALLOCATION_ERROR;
289 return -1;
290 }
291
292 // First copy the prefix and then the suffix, leaving room for the new chars that the
293 // caller wants to insert.
294 // C++ note: memcpy is OK because the src and dest do not overlap.
295 uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
296 uprv_memcpy2(newChars + newZero + index + count,
297 oldChars + oldZero + index,
298 sizeof(char16_t) * (fLength - index));
299 uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
300 uprv_memcpy2(newFields + newZero + index + count,
301 oldFields + oldZero + index,
302 sizeof(Field) * (fLength - index));
303
304 if (fUsingHeap) {
305 uprv_free(oldChars);
306 uprv_free(oldFields);
307 }
308 fUsingHeap = true;
309 fChars.heap.ptr = newChars;
310 fChars.heap.capacity = newCapacity;
311 fFields.heap.ptr = newFields;
312 fFields.heap.capacity = newCapacity;
313 fZero = newZero;
314 fLength += count;
315 } else {
316 int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
317
318 // C++ note: memmove is required because src and dest may overlap.
319 // First copy the entire string to the location of the prefix, and then move the suffix
320 // to make room for the new chars that the caller wants to insert.
321 uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
322 uprv_memmove2(oldChars + newZero + index + count,
323 oldChars + newZero + index,
324 sizeof(char16_t) * (fLength - index));
325 uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
326 uprv_memmove2(oldFields + newZero + index + count,
327 oldFields + newZero + index,
328 sizeof(Field) * (fLength - index));
329
330 fZero = newZero;
331 fLength += count;
332 }
333 return fZero + index;
334}
335
336int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
337 // TODO: Reset the heap here? (If the string after removal can fit on stack?)
338 int32_t position = index + fZero;
339 uprv_memmove2(getCharPtr() + position,
340 getCharPtr() + position + count,
341 sizeof(char16_t) * (fLength - index - count));
342 uprv_memmove2(getFieldPtr() + position,
343 getFieldPtr() + position + count,
344 sizeof(Field) * (fLength - index - count));
345 fLength -= count;
346 return position;
347}
348
349UnicodeString FormattedStringBuilder::toUnicodeString() const {
350 return UnicodeString(getCharPtr() + fZero, fLength);
351}
352
353const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
354 // Readonly-alias constructor:
355 return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
356}
357
358UnicodeString FormattedStringBuilder::toDebugString() const {
359 UnicodeString sb;
360 sb.append(u"<FormattedStringBuilder [", -1);
361 sb.append(toUnicodeString());
362 sb.append(u"] [", -1);
363 for (int i = 0; i < fLength; i++) {
364 if (fieldAt(i) == kUndefinedField) {
365 sb.append(u'n');
366 } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
367 char16_t c;
368 switch (fieldAt(i).getField()) {
369 case UNUM_SIGN_FIELD:
370 c = u'-';
371 break;
372 case UNUM_INTEGER_FIELD:
373 c = u'i';
374 break;
375 case UNUM_FRACTION_FIELD:
376 c = u'f';
377 break;
378 case UNUM_EXPONENT_FIELD:
379 c = u'e';
380 break;
381 case UNUM_EXPONENT_SIGN_FIELD:
382 c = u'+';
383 break;
384 case UNUM_EXPONENT_SYMBOL_FIELD:
385 c = u'E';
386 break;
387 case UNUM_DECIMAL_SEPARATOR_FIELD:
388 c = u'.';
389 break;
390 case UNUM_GROUPING_SEPARATOR_FIELD:
391 c = u',';
392 break;
393 case UNUM_PERCENT_FIELD:
394 c = u'%';
395 break;
396 case UNUM_PERMILL_FIELD:
397 c = u'‰';
398 break;
399 case UNUM_CURRENCY_FIELD:
400 c = u'$';
401 break;
402 default:
403 c = u'0' + fieldAt(i).getField();
404 break;
405 }
406 sb.append(c);
407 } else {
408 sb.append(u'0' + fieldAt(i).getCategory());
409 }
410 }
411 sb.append(u"]>", -1);
412 return sb;
413}
414
415const char16_t *FormattedStringBuilder::chars() const {
416 return getCharPtr() + fZero;
417}
418
419bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
420 if (fLength != other.fLength) {
421 return false;
422 }
423 for (int32_t i = 0; i < fLength; i++) {
424 if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
425 return false;
426 }
427 }
428 return true;
429}
430
431bool FormattedStringBuilder::containsField(Field field) const {
432 for (int32_t i = 0; i < fLength; i++) {
433 if (field == fieldAt(i)) {
434 return true;
435 }
436 }
437 return false;
438}
439
440U_NAMESPACE_END
441
442#endif /* #if !UCONFIG_NO_FORMATTING */
443