1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7
8#include "formatted_string_builder.h"
9#include "unicode/ustring.h"
10#include "unicode/utf16.h"
11
12namespace {
13
14// A version of uprv_memcpy that checks for length 0.
15// By default, uprv_memcpy requires a length of at least 1.
16inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
17 if (len > 0) {
18 uprv_memcpy(dest, src, len);
19 }
20}
21
22// A version of uprv_memmove that checks for length 0.
23// By default, uprv_memmove requires a length of at least 1.
24inline void uprv_memmove2(void* dest, const void* src, size_t len) {
25 if (len > 0) {
26 uprv_memmove(dest, src, len);
27 }
28}
29
30} // namespace
31
32
33U_NAMESPACE_BEGIN
34
35FormattedStringBuilder::FormattedStringBuilder() {
36#if U_DEBUG
37 // Initializing the memory to non-zero helps catch some bugs that involve
38 // reading from an improperly terminated string.
39 for (int32_t i=0; i<getCapacity(); i++) {
40 getCharPtr()[i] = 1;
41 }
42#endif
43}
44
45FormattedStringBuilder::~FormattedStringBuilder() {
46 if (fUsingHeap) {
47 uprv_free(fChars.heap.ptr);
48 uprv_free(fFields.heap.ptr);
49 }
50}
51
52FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
53 *this = other;
54}
55
56FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
57 // Check for self-assignment
58 if (this == &other) {
59 return *this;
60 }
61
62 // Continue with deallocation and copying
63 if (fUsingHeap) {
64 uprv_free(fChars.heap.ptr);
65 uprv_free(fFields.heap.ptr);
66 fUsingHeap = false;
67 }
68
69 int32_t capacity = other.getCapacity();
70 if (capacity > DEFAULT_CAPACITY) {
71 // FIXME: uprv_malloc
72 // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
73 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
74 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
75 if (newChars == nullptr || newFields == nullptr) {
76 // UErrorCode is not available; fail silently.
77 uprv_free(newChars);
78 uprv_free(newFields);
79 *this = FormattedStringBuilder(); // can't fail
80 return *this;
81 }
82
83 fUsingHeap = true;
84 fChars.heap.capacity = capacity;
85 fChars.heap.ptr = newChars;
86 fFields.heap.capacity = capacity;
87 fFields.heap.ptr = newFields;
88 }
89
90 uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
91 uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
92
93 fZero = other.fZero;
94 fLength = other.fLength;
95 return *this;
96}
97
98int32_t FormattedStringBuilder::length() const {
99 return fLength;
100}
101
102int32_t FormattedStringBuilder::codePointCount() const {
103 return u_countChar32(getCharPtr() + fZero, fLength);
104}
105
106UChar32 FormattedStringBuilder::getFirstCodePoint() const {
107 if (fLength == 0) {
108 return -1;
109 }
110 UChar32 cp;
111 U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
112 return cp;
113}
114
115UChar32 FormattedStringBuilder::getLastCodePoint() const {
116 if (fLength == 0) {
117 return -1;
118 }
119 int32_t offset = fLength;
120 U16_BACK_1(getCharPtr() + fZero, 0, offset);
121 UChar32 cp;
122 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
123 return cp;
124}
125
126UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
127 UChar32 cp;
128 U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
129 return cp;
130}
131
132UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
133 int32_t offset = index;
134 U16_BACK_1(getCharPtr() + fZero, 0, offset);
135 UChar32 cp;
136 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
137 return cp;
138}
139
140FormattedStringBuilder &FormattedStringBuilder::clear() {
141 // TODO: Reset the heap here?
142 fZero = getCapacity() / 2;
143 fLength = 0;
144 return *this;
145}
146
147int32_t
148FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
149 int32_t count = U16_LENGTH(codePoint);
150 int32_t position = prepareForInsert(index, count, status);
151 if (U_FAILURE(status)) {
152 return count;
153 }
154 if (count == 1) {
155 getCharPtr()[position] = (char16_t) codePoint;
156 getFieldPtr()[position] = field;
157 } else {
158 getCharPtr()[position] = U16_LEAD(codePoint);
159 getCharPtr()[position + 1] = U16_TRAIL(codePoint);
160 getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
161 }
162 return count;
163}
164
165int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
166 UErrorCode &status) {
167 if (unistr.length() == 0) {
168 // Nothing to insert.
169 return 0;
170 } else if (unistr.length() == 1) {
171 // Fast path: insert using insertCodePoint.
172 return insertCodePoint(index, unistr.charAt(0), field, status);
173 } else {
174 return insert(index, unistr, 0, unistr.length(), field, status);
175 }
176}
177
178int32_t
179FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
180 Field field, UErrorCode &status) {
181 int32_t count = end - start;
182 int32_t position = prepareForInsert(index, count, status);
183 if (U_FAILURE(status)) {
184 return count;
185 }
186 for (int32_t i = 0; i < count; i++) {
187 getCharPtr()[position + i] = unistr.charAt(start + i);
188 getFieldPtr()[position + i] = field;
189 }
190 return count;
191}
192
193int32_t
194FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
195 int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
196 int32_t thisLength = endThis - startThis;
197 int32_t otherLength = endOther - startOther;
198 int32_t count = otherLength - thisLength;
199 int32_t position;
200 if (count > 0) {
201 // Overall, chars need to be added.
202 position = prepareForInsert(startThis, count, status);
203 } else {
204 // Overall, chars need to be removed or kept the same.
205 position = remove(startThis, -count);
206 }
207 if (U_FAILURE(status)) {
208 return count;
209 }
210 for (int32_t i = 0; i < otherLength; i++) {
211 getCharPtr()[position + i] = unistr.charAt(startOther + i);
212 getFieldPtr()[position + i] = field;
213 }
214 return count;
215}
216
217int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
218 return insert(fLength, other, status);
219}
220
221int32_t
222FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
223 if (this == &other) {
224 status = U_ILLEGAL_ARGUMENT_ERROR;
225 return 0;
226 }
227 int32_t count = other.fLength;
228 if (count == 0) {
229 // Nothing to insert.
230 return 0;
231 }
232 int32_t position = prepareForInsert(index, count, status);
233 if (U_FAILURE(status)) {
234 return count;
235 }
236 for (int32_t i = 0; i < count; i++) {
237 getCharPtr()[position + i] = other.charAt(i);
238 getFieldPtr()[position + i] = other.fieldAt(i);
239 }
240 return count;
241}
242
243void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
244 int32_t position = prepareForInsert(fLength, 1, status);
245 if (U_FAILURE(status)) {
246 return;
247 }
248 getCharPtr()[position] = 0;
249 getFieldPtr()[position] = UNUM_FIELD_COUNT;
250 fLength--;
251}
252
253int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
254 U_ASSERT(index >= 0);
255 U_ASSERT(index <= fLength);
256 U_ASSERT(count >= 0);
257 if (index == 0 && fZero - count >= 0) {
258 // Append to start
259 fZero -= count;
260 fLength += count;
261 return fZero;
262 } else if (index == fLength && fZero + fLength + count < getCapacity()) {
263 // Append to end
264 fLength += count;
265 return fZero + fLength - count;
266 } else {
267 // Move chars around and/or allocate more space
268 return prepareForInsertHelper(index, count, status);
269 }
270}
271
272int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
273 int32_t oldCapacity = getCapacity();
274 int32_t oldZero = fZero;
275 char16_t *oldChars = getCharPtr();
276 Field *oldFields = getFieldPtr();
277 if (fLength + count > oldCapacity) {
278 int32_t newCapacity = (fLength + count) * 2;
279 int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
280
281 // C++ note: malloc appears in two places: here and in the assignment operator.
282 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
283 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
284 if (newChars == nullptr || newFields == nullptr) {
285 uprv_free(newChars);
286 uprv_free(newFields);
287 status = U_MEMORY_ALLOCATION_ERROR;
288 return -1;
289 }
290
291 // First copy the prefix and then the suffix, leaving room for the new chars that the
292 // caller wants to insert.
293 // C++ note: memcpy is OK because the src and dest do not overlap.
294 uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
295 uprv_memcpy2(newChars + newZero + index + count,
296 oldChars + oldZero + index,
297 sizeof(char16_t) * (fLength - index));
298 uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
299 uprv_memcpy2(newFields + newZero + index + count,
300 oldFields + oldZero + index,
301 sizeof(Field) * (fLength - index));
302
303 if (fUsingHeap) {
304 uprv_free(oldChars);
305 uprv_free(oldFields);
306 }
307 fUsingHeap = true;
308 fChars.heap.ptr = newChars;
309 fChars.heap.capacity = newCapacity;
310 fFields.heap.ptr = newFields;
311 fFields.heap.capacity = newCapacity;
312 fZero = newZero;
313 fLength += count;
314 } else {
315 int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
316
317 // C++ note: memmove is required because src and dest may overlap.
318 // First copy the entire string to the location of the prefix, and then move the suffix
319 // to make room for the new chars that the caller wants to insert.
320 uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
321 uprv_memmove2(oldChars + newZero + index + count,
322 oldChars + newZero + index,
323 sizeof(char16_t) * (fLength - index));
324 uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
325 uprv_memmove2(oldFields + newZero + index + count,
326 oldFields + newZero + index,
327 sizeof(Field) * (fLength - index));
328
329 fZero = newZero;
330 fLength += count;
331 }
332 return fZero + index;
333}
334
335int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
336 // TODO: Reset the heap here? (If the string after removal can fit on stack?)
337 int32_t position = index + fZero;
338 uprv_memmove2(getCharPtr() + position,
339 getCharPtr() + position + count,
340 sizeof(char16_t) * (fLength - index - count));
341 uprv_memmove2(getFieldPtr() + position,
342 getFieldPtr() + position + count,
343 sizeof(Field) * (fLength - index - count));
344 fLength -= count;
345 return position;
346}
347
348UnicodeString FormattedStringBuilder::toUnicodeString() const {
349 return UnicodeString(getCharPtr() + fZero, fLength);
350}
351
352const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
353 // Readonly-alias constructor:
354 return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
355}
356
357UnicodeString FormattedStringBuilder::toDebugString() const {
358 UnicodeString sb;
359 sb.append(u"<FormattedStringBuilder [", -1);
360 sb.append(toUnicodeString());
361 sb.append(u"] [", -1);
362 for (int i = 0; i < fLength; i++) {
363 if (fieldAt(i) == UNUM_FIELD_COUNT) {
364 sb.append(u'n');
365 } else {
366 char16_t c;
367 switch (fieldAt(i)) {
368 case UNUM_SIGN_FIELD:
369 c = u'-';
370 break;
371 case UNUM_INTEGER_FIELD:
372 c = u'i';
373 break;
374 case UNUM_FRACTION_FIELD:
375 c = u'f';
376 break;
377 case UNUM_EXPONENT_FIELD:
378 c = u'e';
379 break;
380 case UNUM_EXPONENT_SIGN_FIELD:
381 c = u'+';
382 break;
383 case UNUM_EXPONENT_SYMBOL_FIELD:
384 c = u'E';
385 break;
386 case UNUM_DECIMAL_SEPARATOR_FIELD:
387 c = u'.';
388 break;
389 case UNUM_GROUPING_SEPARATOR_FIELD:
390 c = u',';
391 break;
392 case UNUM_PERCENT_FIELD:
393 c = u'%';
394 break;
395 case UNUM_PERMILL_FIELD:
396 c = u'‰';
397 break;
398 case UNUM_CURRENCY_FIELD:
399 c = u'$';
400 break;
401 default:
402 c = u'?';
403 break;
404 }
405 sb.append(c);
406 }
407 }
408 sb.append(u"]>", -1);
409 return sb;
410}
411
412const char16_t *FormattedStringBuilder::chars() const {
413 return getCharPtr() + fZero;
414}
415
416bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
417 if (fLength != other.fLength) {
418 return false;
419 }
420 for (int32_t i = 0; i < fLength; i++) {
421 if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
422 return false;
423 }
424 }
425 return true;
426}
427
428bool FormattedStringBuilder::containsField(Field field) const {
429 for (int32_t i = 0; i < fLength; i++) {
430 if (field == fieldAt(i)) {
431 return true;
432 }
433 }
434 return false;
435}
436
437U_NAMESPACE_END
438
439#endif /* #if !UCONFIG_NO_FORMATTING */
440