1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING |
7 | |
8 | #include "formatted_string_builder.h" |
9 | #include "unicode/ustring.h" |
10 | #include "unicode/utf16.h" |
11 | |
12 | namespace { |
13 | |
14 | // A version of uprv_memcpy that checks for length 0. |
15 | // By default, uprv_memcpy requires a length of at least 1. |
16 | inline void uprv_memcpy2(void* dest, const void* src, size_t len) { |
17 | if (len > 0) { |
18 | uprv_memcpy(dest, src, len); |
19 | } |
20 | } |
21 | |
22 | // A version of uprv_memmove that checks for length 0. |
23 | // By default, uprv_memmove requires a length of at least 1. |
24 | inline void uprv_memmove2(void* dest, const void* src, size_t len) { |
25 | if (len > 0) { |
26 | uprv_memmove(dest, src, len); |
27 | } |
28 | } |
29 | |
30 | } // namespace |
31 | |
32 | |
33 | U_NAMESPACE_BEGIN |
34 | |
35 | FormattedStringBuilder::FormattedStringBuilder() { |
36 | #if U_DEBUG |
37 | // Initializing the memory to non-zero helps catch some bugs that involve |
38 | // reading from an improperly terminated string. |
39 | for (int32_t i=0; i<getCapacity(); i++) { |
40 | getCharPtr()[i] = 1; |
41 | } |
42 | #endif |
43 | } |
44 | |
45 | FormattedStringBuilder::~FormattedStringBuilder() { |
46 | if (fUsingHeap) { |
47 | uprv_free(fChars.heap.ptr); |
48 | uprv_free(fFields.heap.ptr); |
49 | } |
50 | } |
51 | |
52 | FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) { |
53 | *this = other; |
54 | } |
55 | |
56 | FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) { |
57 | // Check for self-assignment |
58 | if (this == &other) { |
59 | return *this; |
60 | } |
61 | |
62 | // Continue with deallocation and copying |
63 | if (fUsingHeap) { |
64 | uprv_free(fChars.heap.ptr); |
65 | uprv_free(fFields.heap.ptr); |
66 | fUsingHeap = false; |
67 | } |
68 | |
69 | int32_t capacity = other.getCapacity(); |
70 | if (capacity > DEFAULT_CAPACITY) { |
71 | // FIXME: uprv_malloc |
72 | // C++ note: malloc appears in two places: here and in prepareForInsertHelper. |
73 | auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity)); |
74 | auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity)); |
75 | if (newChars == nullptr || newFields == nullptr) { |
76 | // UErrorCode is not available; fail silently. |
77 | uprv_free(newChars); |
78 | uprv_free(newFields); |
79 | *this = FormattedStringBuilder(); // can't fail |
80 | return *this; |
81 | } |
82 | |
83 | fUsingHeap = true; |
84 | fChars.heap.capacity = capacity; |
85 | fChars.heap.ptr = newChars; |
86 | fFields.heap.capacity = capacity; |
87 | fFields.heap.ptr = newFields; |
88 | } |
89 | |
90 | uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity); |
91 | uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity); |
92 | |
93 | fZero = other.fZero; |
94 | fLength = other.fLength; |
95 | return *this; |
96 | } |
97 | |
98 | int32_t FormattedStringBuilder::length() const { |
99 | return fLength; |
100 | } |
101 | |
102 | int32_t FormattedStringBuilder::codePointCount() const { |
103 | return u_countChar32(getCharPtr() + fZero, fLength); |
104 | } |
105 | |
106 | UChar32 FormattedStringBuilder::getFirstCodePoint() const { |
107 | if (fLength == 0) { |
108 | return -1; |
109 | } |
110 | UChar32 cp; |
111 | U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp); |
112 | return cp; |
113 | } |
114 | |
115 | UChar32 FormattedStringBuilder::getLastCodePoint() const { |
116 | if (fLength == 0) { |
117 | return -1; |
118 | } |
119 | int32_t offset = fLength; |
120 | U16_BACK_1(getCharPtr() + fZero, 0, offset); |
121 | UChar32 cp; |
122 | U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); |
123 | return cp; |
124 | } |
125 | |
126 | UChar32 FormattedStringBuilder::codePointAt(int32_t index) const { |
127 | UChar32 cp; |
128 | U16_GET(getCharPtr() + fZero, 0, index, fLength, cp); |
129 | return cp; |
130 | } |
131 | |
132 | UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const { |
133 | int32_t offset = index; |
134 | U16_BACK_1(getCharPtr() + fZero, 0, offset); |
135 | UChar32 cp; |
136 | U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); |
137 | return cp; |
138 | } |
139 | |
140 | FormattedStringBuilder &FormattedStringBuilder::clear() { |
141 | // TODO: Reset the heap here? |
142 | fZero = getCapacity() / 2; |
143 | fLength = 0; |
144 | return *this; |
145 | } |
146 | |
147 | int32_t |
148 | FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) { |
149 | int32_t count = U16_LENGTH(codePoint); |
150 | int32_t position = prepareForInsert(index, count, status); |
151 | if (U_FAILURE(status)) { |
152 | return count; |
153 | } |
154 | if (count == 1) { |
155 | getCharPtr()[position] = (char16_t) codePoint; |
156 | getFieldPtr()[position] = field; |
157 | } else { |
158 | getCharPtr()[position] = U16_LEAD(codePoint); |
159 | getCharPtr()[position + 1] = U16_TRAIL(codePoint); |
160 | getFieldPtr()[position] = getFieldPtr()[position + 1] = field; |
161 | } |
162 | return count; |
163 | } |
164 | |
165 | int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field, |
166 | UErrorCode &status) { |
167 | if (unistr.length() == 0) { |
168 | // Nothing to insert. |
169 | return 0; |
170 | } else if (unistr.length() == 1) { |
171 | // Fast path: insert using insertCodePoint. |
172 | return insertCodePoint(index, unistr.charAt(0), field, status); |
173 | } else { |
174 | return insert(index, unistr, 0, unistr.length(), field, status); |
175 | } |
176 | } |
177 | |
178 | int32_t |
179 | FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, |
180 | Field field, UErrorCode &status) { |
181 | int32_t count = end - start; |
182 | int32_t position = prepareForInsert(index, count, status); |
183 | if (U_FAILURE(status)) { |
184 | return count; |
185 | } |
186 | for (int32_t i = 0; i < count; i++) { |
187 | getCharPtr()[position + i] = unistr.charAt(start + i); |
188 | getFieldPtr()[position + i] = field; |
189 | } |
190 | return count; |
191 | } |
192 | |
193 | int32_t |
194 | FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, |
195 | int32_t startOther, int32_t endOther, Field field, UErrorCode& status) { |
196 | int32_t thisLength = endThis - startThis; |
197 | int32_t otherLength = endOther - startOther; |
198 | int32_t count = otherLength - thisLength; |
199 | int32_t position; |
200 | if (count > 0) { |
201 | // Overall, chars need to be added. |
202 | position = prepareForInsert(startThis, count, status); |
203 | } else { |
204 | // Overall, chars need to be removed or kept the same. |
205 | position = remove(startThis, -count); |
206 | } |
207 | if (U_FAILURE(status)) { |
208 | return count; |
209 | } |
210 | for (int32_t i = 0; i < otherLength; i++) { |
211 | getCharPtr()[position + i] = unistr.charAt(startOther + i); |
212 | getFieldPtr()[position + i] = field; |
213 | } |
214 | return count; |
215 | } |
216 | |
217 | int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) { |
218 | return insert(fLength, other, status); |
219 | } |
220 | |
221 | int32_t |
222 | FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) { |
223 | if (this == &other) { |
224 | status = U_ILLEGAL_ARGUMENT_ERROR; |
225 | return 0; |
226 | } |
227 | int32_t count = other.fLength; |
228 | if (count == 0) { |
229 | // Nothing to insert. |
230 | return 0; |
231 | } |
232 | int32_t position = prepareForInsert(index, count, status); |
233 | if (U_FAILURE(status)) { |
234 | return count; |
235 | } |
236 | for (int32_t i = 0; i < count; i++) { |
237 | getCharPtr()[position + i] = other.charAt(i); |
238 | getFieldPtr()[position + i] = other.fieldAt(i); |
239 | } |
240 | return count; |
241 | } |
242 | |
243 | void FormattedStringBuilder::writeTerminator(UErrorCode& status) { |
244 | int32_t position = prepareForInsert(fLength, 1, status); |
245 | if (U_FAILURE(status)) { |
246 | return; |
247 | } |
248 | getCharPtr()[position] = 0; |
249 | getFieldPtr()[position] = UNUM_FIELD_COUNT; |
250 | fLength--; |
251 | } |
252 | |
253 | int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) { |
254 | U_ASSERT(index >= 0); |
255 | U_ASSERT(index <= fLength); |
256 | U_ASSERT(count >= 0); |
257 | if (index == 0 && fZero - count >= 0) { |
258 | // Append to start |
259 | fZero -= count; |
260 | fLength += count; |
261 | return fZero; |
262 | } else if (index == fLength && fZero + fLength + count < getCapacity()) { |
263 | // Append to end |
264 | fLength += count; |
265 | return fZero + fLength - count; |
266 | } else { |
267 | // Move chars around and/or allocate more space |
268 | return prepareForInsertHelper(index, count, status); |
269 | } |
270 | } |
271 | |
272 | int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) { |
273 | int32_t oldCapacity = getCapacity(); |
274 | int32_t oldZero = fZero; |
275 | char16_t *oldChars = getCharPtr(); |
276 | Field *oldFields = getFieldPtr(); |
277 | if (fLength + count > oldCapacity) { |
278 | int32_t newCapacity = (fLength + count) * 2; |
279 | int32_t newZero = newCapacity / 2 - (fLength + count) / 2; |
280 | |
281 | // C++ note: malloc appears in two places: here and in the assignment operator. |
282 | auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity)); |
283 | auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity)); |
284 | if (newChars == nullptr || newFields == nullptr) { |
285 | uprv_free(newChars); |
286 | uprv_free(newFields); |
287 | status = U_MEMORY_ALLOCATION_ERROR; |
288 | return -1; |
289 | } |
290 | |
291 | // First copy the prefix and then the suffix, leaving room for the new chars that the |
292 | // caller wants to insert. |
293 | // C++ note: memcpy is OK because the src and dest do not overlap. |
294 | uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index); |
295 | uprv_memcpy2(newChars + newZero + index + count, |
296 | oldChars + oldZero + index, |
297 | sizeof(char16_t) * (fLength - index)); |
298 | uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index); |
299 | uprv_memcpy2(newFields + newZero + index + count, |
300 | oldFields + oldZero + index, |
301 | sizeof(Field) * (fLength - index)); |
302 | |
303 | if (fUsingHeap) { |
304 | uprv_free(oldChars); |
305 | uprv_free(oldFields); |
306 | } |
307 | fUsingHeap = true; |
308 | fChars.heap.ptr = newChars; |
309 | fChars.heap.capacity = newCapacity; |
310 | fFields.heap.ptr = newFields; |
311 | fFields.heap.capacity = newCapacity; |
312 | fZero = newZero; |
313 | fLength += count; |
314 | } else { |
315 | int32_t newZero = oldCapacity / 2 - (fLength + count) / 2; |
316 | |
317 | // C++ note: memmove is required because src and dest may overlap. |
318 | // First copy the entire string to the location of the prefix, and then move the suffix |
319 | // to make room for the new chars that the caller wants to insert. |
320 | uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength); |
321 | uprv_memmove2(oldChars + newZero + index + count, |
322 | oldChars + newZero + index, |
323 | sizeof(char16_t) * (fLength - index)); |
324 | uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength); |
325 | uprv_memmove2(oldFields + newZero + index + count, |
326 | oldFields + newZero + index, |
327 | sizeof(Field) * (fLength - index)); |
328 | |
329 | fZero = newZero; |
330 | fLength += count; |
331 | } |
332 | return fZero + index; |
333 | } |
334 | |
335 | int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) { |
336 | // TODO: Reset the heap here? (If the string after removal can fit on stack?) |
337 | int32_t position = index + fZero; |
338 | uprv_memmove2(getCharPtr() + position, |
339 | getCharPtr() + position + count, |
340 | sizeof(char16_t) * (fLength - index - count)); |
341 | uprv_memmove2(getFieldPtr() + position, |
342 | getFieldPtr() + position + count, |
343 | sizeof(Field) * (fLength - index - count)); |
344 | fLength -= count; |
345 | return position; |
346 | } |
347 | |
348 | UnicodeString FormattedStringBuilder::toUnicodeString() const { |
349 | return UnicodeString(getCharPtr() + fZero, fLength); |
350 | } |
351 | |
352 | const UnicodeString FormattedStringBuilder::toTempUnicodeString() const { |
353 | // Readonly-alias constructor: |
354 | return UnicodeString(FALSE, getCharPtr() + fZero, fLength); |
355 | } |
356 | |
357 | UnicodeString FormattedStringBuilder::toDebugString() const { |
358 | UnicodeString sb; |
359 | sb.append(u"<FormattedStringBuilder [" , -1); |
360 | sb.append(toUnicodeString()); |
361 | sb.append(u"] [" , -1); |
362 | for (int i = 0; i < fLength; i++) { |
363 | if (fieldAt(i) == UNUM_FIELD_COUNT) { |
364 | sb.append(u'n'); |
365 | } else { |
366 | char16_t c; |
367 | switch (fieldAt(i)) { |
368 | case UNUM_SIGN_FIELD: |
369 | c = u'-'; |
370 | break; |
371 | case UNUM_INTEGER_FIELD: |
372 | c = u'i'; |
373 | break; |
374 | case UNUM_FRACTION_FIELD: |
375 | c = u'f'; |
376 | break; |
377 | case UNUM_EXPONENT_FIELD: |
378 | c = u'e'; |
379 | break; |
380 | case UNUM_EXPONENT_SIGN_FIELD: |
381 | c = u'+'; |
382 | break; |
383 | case UNUM_EXPONENT_SYMBOL_FIELD: |
384 | c = u'E'; |
385 | break; |
386 | case UNUM_DECIMAL_SEPARATOR_FIELD: |
387 | c = u'.'; |
388 | break; |
389 | case UNUM_GROUPING_SEPARATOR_FIELD: |
390 | c = u','; |
391 | break; |
392 | case UNUM_PERCENT_FIELD: |
393 | c = u'%'; |
394 | break; |
395 | case UNUM_PERMILL_FIELD: |
396 | c = u'‰'; |
397 | break; |
398 | case UNUM_CURRENCY_FIELD: |
399 | c = u'$'; |
400 | break; |
401 | default: |
402 | c = u'?'; |
403 | break; |
404 | } |
405 | sb.append(c); |
406 | } |
407 | } |
408 | sb.append(u"]>" , -1); |
409 | return sb; |
410 | } |
411 | |
412 | const char16_t *FormattedStringBuilder::chars() const { |
413 | return getCharPtr() + fZero; |
414 | } |
415 | |
416 | bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const { |
417 | if (fLength != other.fLength) { |
418 | return false; |
419 | } |
420 | for (int32_t i = 0; i < fLength; i++) { |
421 | if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) { |
422 | return false; |
423 | } |
424 | } |
425 | return true; |
426 | } |
427 | |
428 | bool FormattedStringBuilder::containsField(Field field) const { |
429 | for (int32_t i = 0; i < fLength; i++) { |
430 | if (field == fieldAt(i)) { |
431 | return true; |
432 | } |
433 | } |
434 | return false; |
435 | } |
436 | |
437 | U_NAMESPACE_END |
438 | |
439 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
440 | |