1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7#ifndef __NUMBER_STRINGBUILDER_H__
8#define __NUMBER_STRINGBUILDER_H__
9
10
11#include <cstdint>
12#include <type_traits>
13
14#include "cstring.h"
15#include "uassert.h"
16#include "fphdlimp.h"
17
18U_NAMESPACE_BEGIN
19
20class FormattedValueStringBuilderImpl;
21
22/**
23 * A StringBuilder optimized for formatting. It implements the following key
24 * features beyond a UnicodeString:
25 *
26 * <ol>
27 * <li>Efficient prepend as well as append.
28 * <li>Keeps tracks of Fields in an efficient manner.
29 * </ol>
30 *
31 * See also FormattedValueStringBuilderImpl.
32 *
33 * @author sffc (Shane Carr)
34 */
35class U_I18N_API FormattedStringBuilder : public UMemory {
36 private:
37 static const int32_t DEFAULT_CAPACITY = 40;
38
39 template<typename T>
40 union ValueOrHeapArray {
41 T value[DEFAULT_CAPACITY];
42 struct {
43 T *ptr;
44 int32_t capacity;
45 } heap;
46 };
47
48 public:
49 FormattedStringBuilder();
50
51 ~FormattedStringBuilder();
52
53 FormattedStringBuilder(const FormattedStringBuilder &other);
54
55 // Convention: bottom 4 bits for field, top 4 bits for field category.
56 // Field category 0 implies the number category so that the number field
57 // literals can be directly passed as a Field type.
58 // See the helper functions in "StringBuilderFieldUtils" below.
59 // Exported as U_I18N_API so it can be used by other exports on Windows.
60 struct U_I18N_API Field {
61 uint8_t bits;
62
63 Field() = default;
64 constexpr Field(uint8_t category, uint8_t field);
65
66 inline UFieldCategory getCategory() const;
67 inline int32_t getField() const;
68 inline bool isNumeric() const;
69 inline bool isUndefined() const;
70 inline bool operator==(const Field& other) const;
71 inline bool operator!=(const Field& other) const;
72 };
73
74 FormattedStringBuilder &operator=(const FormattedStringBuilder &other);
75
76 int32_t length() const;
77
78 int32_t codePointCount() const;
79
80 inline char16_t charAt(int32_t index) const {
81 U_ASSERT(index >= 0);
82 U_ASSERT(index < fLength);
83 return getCharPtr()[fZero + index];
84 }
85
86 inline Field fieldAt(int32_t index) const {
87 U_ASSERT(index >= 0);
88 U_ASSERT(index < fLength);
89 return getFieldPtr()[fZero + index];
90 }
91
92 UChar32 getFirstCodePoint() const;
93
94 UChar32 getLastCodePoint() const;
95
96 UChar32 codePointAt(int32_t index) const;
97
98 UChar32 codePointBefore(int32_t index) const;
99
100 FormattedStringBuilder &clear();
101
102 /** Appends a UTF-16 code unit. */
103 inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) {
104 // appendCodePoint handles both code units and code points.
105 return insertCodePoint(fLength, codeUnit, field, status);
106 }
107
108 /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */
109 inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) {
110 // insertCodePoint handles both code units and code points.
111 return insertCodePoint(index, codeUnit, field, status);
112 }
113
114 /** Appends a Unicode code point. */
115 inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
116 return insertCodePoint(fLength, codePoint, field, status);
117 }
118
119 /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */
120 int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status);
121
122 /** Appends a string. */
123 inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) {
124 return insert(fLength, unistr, field, status);
125 }
126
127 /** Inserts a string. Note: insert at index 0 is very efficient. */
128 int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status);
129
130 /** Inserts a substring. Note: insert at index 0 is very efficient.
131 *
132 * @param start Start index of the substring of unistr to be inserted.
133 * @param end End index of the substring of unistr to be inserted (exclusive).
134 */
135 int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field,
136 UErrorCode &status);
137
138 /** Deletes a substring and then inserts a string at that same position.
139 * Similar to JavaScript Array.prototype.splice().
140 *
141 * @param startThis Start of the span to delete.
142 * @param endThis End of the span to delete (exclusive).
143 * @param unistr The string to insert at the deletion position.
144 * @param startOther Start index of the substring of unistr to be inserted.
145 * @param endOther End index of the substring of unistr to be inserted (exclusive).
146 */
147 int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
148 int32_t startOther, int32_t endOther, Field field, UErrorCode& status);
149
150 /** Appends a formatted string. */
151 int32_t append(const FormattedStringBuilder &other, UErrorCode &status);
152
153 /** Inserts a formatted string. Note: insert at index 0 is very efficient. */
154 int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status);
155
156 /**
157 * Ensures that the string buffer contains a NUL terminator. The NUL terminator does
158 * not count toward the string length. Any further changes to the string (insert or
159 * append) may invalidate the NUL terminator.
160 *
161 * You should call this method after the formatted string is completely built if you
162 * plan to return a pointer to the string from a C API.
163 */
164 void writeTerminator(UErrorCode& status);
165
166 /**
167 * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed.
168 */
169 UnicodeString toUnicodeString() const;
170
171 /**
172 * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and
173 * unchanged. Slightly faster than toUnicodeString().
174 */
175 const UnicodeString toTempUnicodeString() const;
176
177 UnicodeString toDebugString() const;
178
179 const char16_t *chars() const;
180
181 bool contentEquals(const FormattedStringBuilder &other) const;
182
183 bool containsField(Field field) const;
184
185 private:
186 bool fUsingHeap = false;
187 ValueOrHeapArray<char16_t> fChars;
188 ValueOrHeapArray<Field> fFields;
189 int32_t fZero = DEFAULT_CAPACITY / 2;
190 int32_t fLength = 0;
191
192 inline char16_t *getCharPtr() {
193 return fUsingHeap ? fChars.heap.ptr : fChars.value;
194 }
195
196 inline const char16_t *getCharPtr() const {
197 return fUsingHeap ? fChars.heap.ptr : fChars.value;
198 }
199
200 inline Field *getFieldPtr() {
201 return fUsingHeap ? fFields.heap.ptr : fFields.value;
202 }
203
204 inline const Field *getFieldPtr() const {
205 return fUsingHeap ? fFields.heap.ptr : fFields.value;
206 }
207
208 inline int32_t getCapacity() const {
209 return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY;
210 }
211
212 int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status);
213
214 int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
215
216 int32_t remove(int32_t index, int32_t count);
217
218 friend class FormattedValueStringBuilderImpl;
219};
220
221static_assert(
222 std::is_pod<FormattedStringBuilder::Field>::value,
223 "Field should be a POD type for efficient initialization");
224
225constexpr FormattedStringBuilder::Field::Field(uint8_t category, uint8_t field)
226 : bits((
227#if 0 // /ICU-21081
228 U_ASSERT(category <= 0xf),
229 U_ASSERT(field <= 0xf),
230#endif
231 static_cast<uint8_t>((category << 4) | field)
232 )) {}
233
234/**
235 * Internal constant for the undefined field for use in FormattedStringBuilder.
236 */
237constexpr FormattedStringBuilder::Field kUndefinedField = {UFIELD_CATEGORY_UNDEFINED, 0};
238
239/**
240 * Internal field to signal "numeric" when fields are not supported in NumberFormat.
241 */
242constexpr FormattedStringBuilder::Field kGeneralNumericField = {UFIELD_CATEGORY_UNDEFINED, 1};
243
244inline UFieldCategory FormattedStringBuilder::Field::getCategory() const {
245 return static_cast<UFieldCategory>(bits >> 4);
246}
247
248inline int32_t FormattedStringBuilder::Field::getField() const {
249 return bits & 0xf;
250}
251
252inline bool FormattedStringBuilder::Field::isNumeric() const {
253 return getCategory() == UFIELD_CATEGORY_NUMBER || *this == kGeneralNumericField;
254}
255
256inline bool FormattedStringBuilder::Field::isUndefined() const {
257 return getCategory() == UFIELD_CATEGORY_UNDEFINED;
258}
259
260inline bool FormattedStringBuilder::Field::operator==(const Field& other) const {
261 return bits == other.bits;
262}
263
264inline bool FormattedStringBuilder::Field::operator!=(const Field& other) const {
265 return bits != other.bits;
266}
267
268U_NAMESPACE_END
269
270
271#endif //__NUMBER_STRINGBUILDER_H__
272
273#endif /* #if !UCONFIG_NO_FORMATTING */
274