| 1 | // © 2017 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | |
| 4 | #include "unicode/utypes.h" |
| 5 | |
| 6 | #if !UCONFIG_NO_FORMATTING |
| 7 | |
| 8 | #include "formatted_string_builder.h" |
| 9 | #include "unicode/ustring.h" |
| 10 | #include "unicode/utf16.h" |
| 11 | |
| 12 | namespace { |
| 13 | |
| 14 | // A version of uprv_memcpy that checks for length 0. |
| 15 | // By default, uprv_memcpy requires a length of at least 1. |
| 16 | inline void uprv_memcpy2(void* dest, const void* src, size_t len) { |
| 17 | if (len > 0) { |
| 18 | uprv_memcpy(dest, src, len); |
| 19 | } |
| 20 | } |
| 21 | |
| 22 | // A version of uprv_memmove that checks for length 0. |
| 23 | // By default, uprv_memmove requires a length of at least 1. |
| 24 | inline void uprv_memmove2(void* dest, const void* src, size_t len) { |
| 25 | if (len > 0) { |
| 26 | uprv_memmove(dest, src, len); |
| 27 | } |
| 28 | } |
| 29 | |
| 30 | } // namespace |
| 31 | |
| 32 | |
| 33 | U_NAMESPACE_BEGIN |
| 34 | |
| 35 | FormattedStringBuilder::FormattedStringBuilder() { |
| 36 | #if U_DEBUG |
| 37 | // Initializing the memory to non-zero helps catch some bugs that involve |
| 38 | // reading from an improperly terminated string. |
| 39 | for (int32_t i=0; i<getCapacity(); i++) { |
| 40 | getCharPtr()[i] = 1; |
| 41 | } |
| 42 | #endif |
| 43 | } |
| 44 | |
| 45 | FormattedStringBuilder::~FormattedStringBuilder() { |
| 46 | if (fUsingHeap) { |
| 47 | uprv_free(fChars.heap.ptr); |
| 48 | uprv_free(fFields.heap.ptr); |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) { |
| 53 | *this = other; |
| 54 | } |
| 55 | |
| 56 | FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) { |
| 57 | // Check for self-assignment |
| 58 | if (this == &other) { |
| 59 | return *this; |
| 60 | } |
| 61 | |
| 62 | // Continue with deallocation and copying |
| 63 | if (fUsingHeap) { |
| 64 | uprv_free(fChars.heap.ptr); |
| 65 | uprv_free(fFields.heap.ptr); |
| 66 | fUsingHeap = false; |
| 67 | } |
| 68 | |
| 69 | int32_t capacity = other.getCapacity(); |
| 70 | if (capacity > DEFAULT_CAPACITY) { |
| 71 | // FIXME: uprv_malloc |
| 72 | // C++ note: malloc appears in two places: here and in prepareForInsertHelper. |
| 73 | auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity)); |
| 74 | auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity)); |
| 75 | if (newChars == nullptr || newFields == nullptr) { |
| 76 | // UErrorCode is not available; fail silently. |
| 77 | uprv_free(newChars); |
| 78 | uprv_free(newFields); |
| 79 | *this = FormattedStringBuilder(); // can't fail |
| 80 | return *this; |
| 81 | } |
| 82 | |
| 83 | fUsingHeap = true; |
| 84 | fChars.heap.capacity = capacity; |
| 85 | fChars.heap.ptr = newChars; |
| 86 | fFields.heap.capacity = capacity; |
| 87 | fFields.heap.ptr = newFields; |
| 88 | } |
| 89 | |
| 90 | uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity); |
| 91 | uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity); |
| 92 | |
| 93 | fZero = other.fZero; |
| 94 | fLength = other.fLength; |
| 95 | return *this; |
| 96 | } |
| 97 | |
| 98 | int32_t FormattedStringBuilder::length() const { |
| 99 | return fLength; |
| 100 | } |
| 101 | |
| 102 | int32_t FormattedStringBuilder::codePointCount() const { |
| 103 | return u_countChar32(getCharPtr() + fZero, fLength); |
| 104 | } |
| 105 | |
| 106 | UChar32 FormattedStringBuilder::getFirstCodePoint() const { |
| 107 | if (fLength == 0) { |
| 108 | return -1; |
| 109 | } |
| 110 | UChar32 cp; |
| 111 | U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp); |
| 112 | return cp; |
| 113 | } |
| 114 | |
| 115 | UChar32 FormattedStringBuilder::getLastCodePoint() const { |
| 116 | if (fLength == 0) { |
| 117 | return -1; |
| 118 | } |
| 119 | int32_t offset = fLength; |
| 120 | U16_BACK_1(getCharPtr() + fZero, 0, offset); |
| 121 | UChar32 cp; |
| 122 | U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); |
| 123 | return cp; |
| 124 | } |
| 125 | |
| 126 | UChar32 FormattedStringBuilder::codePointAt(int32_t index) const { |
| 127 | UChar32 cp; |
| 128 | U16_GET(getCharPtr() + fZero, 0, index, fLength, cp); |
| 129 | return cp; |
| 130 | } |
| 131 | |
| 132 | UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const { |
| 133 | int32_t offset = index; |
| 134 | U16_BACK_1(getCharPtr() + fZero, 0, offset); |
| 135 | UChar32 cp; |
| 136 | U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); |
| 137 | return cp; |
| 138 | } |
| 139 | |
| 140 | FormattedStringBuilder &FormattedStringBuilder::clear() { |
| 141 | // TODO: Reset the heap here? |
| 142 | fZero = getCapacity() / 2; |
| 143 | fLength = 0; |
| 144 | return *this; |
| 145 | } |
| 146 | |
| 147 | int32_t |
| 148 | FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) { |
| 149 | int32_t count = U16_LENGTH(codePoint); |
| 150 | int32_t position = prepareForInsert(index, count, status); |
| 151 | if (U_FAILURE(status)) { |
| 152 | return count; |
| 153 | } |
| 154 | if (count == 1) { |
| 155 | getCharPtr()[position] = (char16_t) codePoint; |
| 156 | getFieldPtr()[position] = field; |
| 157 | } else { |
| 158 | getCharPtr()[position] = U16_LEAD(codePoint); |
| 159 | getCharPtr()[position + 1] = U16_TRAIL(codePoint); |
| 160 | getFieldPtr()[position] = getFieldPtr()[position + 1] = field; |
| 161 | } |
| 162 | return count; |
| 163 | } |
| 164 | |
| 165 | int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field, |
| 166 | UErrorCode &status) { |
| 167 | if (unistr.length() == 0) { |
| 168 | // Nothing to insert. |
| 169 | return 0; |
| 170 | } else if (unistr.length() == 1) { |
| 171 | // Fast path: insert using insertCodePoint. |
| 172 | return insertCodePoint(index, unistr.charAt(0), field, status); |
| 173 | } else { |
| 174 | return insert(index, unistr, 0, unistr.length(), field, status); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | int32_t |
| 179 | FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, |
| 180 | Field field, UErrorCode &status) { |
| 181 | int32_t count = end - start; |
| 182 | int32_t position = prepareForInsert(index, count, status); |
| 183 | if (U_FAILURE(status)) { |
| 184 | return count; |
| 185 | } |
| 186 | for (int32_t i = 0; i < count; i++) { |
| 187 | getCharPtr()[position + i] = unistr.charAt(start + i); |
| 188 | getFieldPtr()[position + i] = field; |
| 189 | } |
| 190 | return count; |
| 191 | } |
| 192 | |
| 193 | int32_t |
| 194 | FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, |
| 195 | int32_t startOther, int32_t endOther, Field field, UErrorCode& status) { |
| 196 | int32_t thisLength = endThis - startThis; |
| 197 | int32_t otherLength = endOther - startOther; |
| 198 | int32_t count = otherLength - thisLength; |
| 199 | int32_t position; |
| 200 | if (count > 0) { |
| 201 | // Overall, chars need to be added. |
| 202 | position = prepareForInsert(startThis, count, status); |
| 203 | } else { |
| 204 | // Overall, chars need to be removed or kept the same. |
| 205 | position = remove(startThis, -count); |
| 206 | } |
| 207 | if (U_FAILURE(status)) { |
| 208 | return count; |
| 209 | } |
| 210 | for (int32_t i = 0; i < otherLength; i++) { |
| 211 | getCharPtr()[position + i] = unistr.charAt(startOther + i); |
| 212 | getFieldPtr()[position + i] = field; |
| 213 | } |
| 214 | return count; |
| 215 | } |
| 216 | |
| 217 | int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) { |
| 218 | return insert(fLength, other, status); |
| 219 | } |
| 220 | |
| 221 | int32_t |
| 222 | FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) { |
| 223 | if (this == &other) { |
| 224 | status = U_ILLEGAL_ARGUMENT_ERROR; |
| 225 | return 0; |
| 226 | } |
| 227 | int32_t count = other.fLength; |
| 228 | if (count == 0) { |
| 229 | // Nothing to insert. |
| 230 | return 0; |
| 231 | } |
| 232 | int32_t position = prepareForInsert(index, count, status); |
| 233 | if (U_FAILURE(status)) { |
| 234 | return count; |
| 235 | } |
| 236 | for (int32_t i = 0; i < count; i++) { |
| 237 | getCharPtr()[position + i] = other.charAt(i); |
| 238 | getFieldPtr()[position + i] = other.fieldAt(i); |
| 239 | } |
| 240 | return count; |
| 241 | } |
| 242 | |
| 243 | void FormattedStringBuilder::writeTerminator(UErrorCode& status) { |
| 244 | int32_t position = prepareForInsert(fLength, 1, status); |
| 245 | if (U_FAILURE(status)) { |
| 246 | return; |
| 247 | } |
| 248 | getCharPtr()[position] = 0; |
| 249 | getFieldPtr()[position] = UNUM_FIELD_COUNT; |
| 250 | fLength--; |
| 251 | } |
| 252 | |
| 253 | int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) { |
| 254 | U_ASSERT(index >= 0); |
| 255 | U_ASSERT(index <= fLength); |
| 256 | U_ASSERT(count >= 0); |
| 257 | if (index == 0 && fZero - count >= 0) { |
| 258 | // Append to start |
| 259 | fZero -= count; |
| 260 | fLength += count; |
| 261 | return fZero; |
| 262 | } else if (index == fLength && fZero + fLength + count < getCapacity()) { |
| 263 | // Append to end |
| 264 | fLength += count; |
| 265 | return fZero + fLength - count; |
| 266 | } else { |
| 267 | // Move chars around and/or allocate more space |
| 268 | return prepareForInsertHelper(index, count, status); |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) { |
| 273 | int32_t oldCapacity = getCapacity(); |
| 274 | int32_t oldZero = fZero; |
| 275 | char16_t *oldChars = getCharPtr(); |
| 276 | Field *oldFields = getFieldPtr(); |
| 277 | if (fLength + count > oldCapacity) { |
| 278 | int32_t newCapacity = (fLength + count) * 2; |
| 279 | int32_t newZero = newCapacity / 2 - (fLength + count) / 2; |
| 280 | |
| 281 | // C++ note: malloc appears in two places: here and in the assignment operator. |
| 282 | auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity)); |
| 283 | auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity)); |
| 284 | if (newChars == nullptr || newFields == nullptr) { |
| 285 | uprv_free(newChars); |
| 286 | uprv_free(newFields); |
| 287 | status = U_MEMORY_ALLOCATION_ERROR; |
| 288 | return -1; |
| 289 | } |
| 290 | |
| 291 | // First copy the prefix and then the suffix, leaving room for the new chars that the |
| 292 | // caller wants to insert. |
| 293 | // C++ note: memcpy is OK because the src and dest do not overlap. |
| 294 | uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index); |
| 295 | uprv_memcpy2(newChars + newZero + index + count, |
| 296 | oldChars + oldZero + index, |
| 297 | sizeof(char16_t) * (fLength - index)); |
| 298 | uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index); |
| 299 | uprv_memcpy2(newFields + newZero + index + count, |
| 300 | oldFields + oldZero + index, |
| 301 | sizeof(Field) * (fLength - index)); |
| 302 | |
| 303 | if (fUsingHeap) { |
| 304 | uprv_free(oldChars); |
| 305 | uprv_free(oldFields); |
| 306 | } |
| 307 | fUsingHeap = true; |
| 308 | fChars.heap.ptr = newChars; |
| 309 | fChars.heap.capacity = newCapacity; |
| 310 | fFields.heap.ptr = newFields; |
| 311 | fFields.heap.capacity = newCapacity; |
| 312 | fZero = newZero; |
| 313 | fLength += count; |
| 314 | } else { |
| 315 | int32_t newZero = oldCapacity / 2 - (fLength + count) / 2; |
| 316 | |
| 317 | // C++ note: memmove is required because src and dest may overlap. |
| 318 | // First copy the entire string to the location of the prefix, and then move the suffix |
| 319 | // to make room for the new chars that the caller wants to insert. |
| 320 | uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength); |
| 321 | uprv_memmove2(oldChars + newZero + index + count, |
| 322 | oldChars + newZero + index, |
| 323 | sizeof(char16_t) * (fLength - index)); |
| 324 | uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength); |
| 325 | uprv_memmove2(oldFields + newZero + index + count, |
| 326 | oldFields + newZero + index, |
| 327 | sizeof(Field) * (fLength - index)); |
| 328 | |
| 329 | fZero = newZero; |
| 330 | fLength += count; |
| 331 | } |
| 332 | return fZero + index; |
| 333 | } |
| 334 | |
| 335 | int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) { |
| 336 | // TODO: Reset the heap here? (If the string after removal can fit on stack?) |
| 337 | int32_t position = index + fZero; |
| 338 | uprv_memmove2(getCharPtr() + position, |
| 339 | getCharPtr() + position + count, |
| 340 | sizeof(char16_t) * (fLength - index - count)); |
| 341 | uprv_memmove2(getFieldPtr() + position, |
| 342 | getFieldPtr() + position + count, |
| 343 | sizeof(Field) * (fLength - index - count)); |
| 344 | fLength -= count; |
| 345 | return position; |
| 346 | } |
| 347 | |
| 348 | UnicodeString FormattedStringBuilder::toUnicodeString() const { |
| 349 | return UnicodeString(getCharPtr() + fZero, fLength); |
| 350 | } |
| 351 | |
| 352 | const UnicodeString FormattedStringBuilder::toTempUnicodeString() const { |
| 353 | // Readonly-alias constructor: |
| 354 | return UnicodeString(FALSE, getCharPtr() + fZero, fLength); |
| 355 | } |
| 356 | |
| 357 | UnicodeString FormattedStringBuilder::toDebugString() const { |
| 358 | UnicodeString sb; |
| 359 | sb.append(u"<FormattedStringBuilder [" , -1); |
| 360 | sb.append(toUnicodeString()); |
| 361 | sb.append(u"] [" , -1); |
| 362 | for (int i = 0; i < fLength; i++) { |
| 363 | if (fieldAt(i) == UNUM_FIELD_COUNT) { |
| 364 | sb.append(u'n'); |
| 365 | } else { |
| 366 | char16_t c; |
| 367 | switch (fieldAt(i)) { |
| 368 | case UNUM_SIGN_FIELD: |
| 369 | c = u'-'; |
| 370 | break; |
| 371 | case UNUM_INTEGER_FIELD: |
| 372 | c = u'i'; |
| 373 | break; |
| 374 | case UNUM_FRACTION_FIELD: |
| 375 | c = u'f'; |
| 376 | break; |
| 377 | case UNUM_EXPONENT_FIELD: |
| 378 | c = u'e'; |
| 379 | break; |
| 380 | case UNUM_EXPONENT_SIGN_FIELD: |
| 381 | c = u'+'; |
| 382 | break; |
| 383 | case UNUM_EXPONENT_SYMBOL_FIELD: |
| 384 | c = u'E'; |
| 385 | break; |
| 386 | case UNUM_DECIMAL_SEPARATOR_FIELD: |
| 387 | c = u'.'; |
| 388 | break; |
| 389 | case UNUM_GROUPING_SEPARATOR_FIELD: |
| 390 | c = u','; |
| 391 | break; |
| 392 | case UNUM_PERCENT_FIELD: |
| 393 | c = u'%'; |
| 394 | break; |
| 395 | case UNUM_PERMILL_FIELD: |
| 396 | c = u'‰'; |
| 397 | break; |
| 398 | case UNUM_CURRENCY_FIELD: |
| 399 | c = u'$'; |
| 400 | break; |
| 401 | default: |
| 402 | c = u'?'; |
| 403 | break; |
| 404 | } |
| 405 | sb.append(c); |
| 406 | } |
| 407 | } |
| 408 | sb.append(u"]>" , -1); |
| 409 | return sb; |
| 410 | } |
| 411 | |
| 412 | const char16_t *FormattedStringBuilder::chars() const { |
| 413 | return getCharPtr() + fZero; |
| 414 | } |
| 415 | |
| 416 | bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const { |
| 417 | if (fLength != other.fLength) { |
| 418 | return false; |
| 419 | } |
| 420 | for (int32_t i = 0; i < fLength; i++) { |
| 421 | if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) { |
| 422 | return false; |
| 423 | } |
| 424 | } |
| 425 | return true; |
| 426 | } |
| 427 | |
| 428 | bool FormattedStringBuilder::containsField(Field field) const { |
| 429 | for (int32_t i = 0; i < fLength; i++) { |
| 430 | if (field == fieldAt(i)) { |
| 431 | return true; |
| 432 | } |
| 433 | } |
| 434 | return false; |
| 435 | } |
| 436 | |
| 437 | U_NAMESPACE_END |
| 438 | |
| 439 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
| 440 | |