1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING |
7 | #ifndef __NUMBER_STRINGBUILDER_H__ |
8 | #define __NUMBER_STRINGBUILDER_H__ |
9 | |
10 | |
11 | #include <cstdint> |
12 | #include <type_traits> |
13 | |
14 | #include "cstring.h" |
15 | #include "uassert.h" |
16 | #include "fphdlimp.h" |
17 | |
18 | U_NAMESPACE_BEGIN |
19 | |
20 | class FormattedValueStringBuilderImpl; |
21 | |
22 | /** |
23 | * A StringBuilder optimized for formatting. It implements the following key |
24 | * features beyond a UnicodeString: |
25 | * |
26 | * <ol> |
27 | * <li>Efficient prepend as well as append. |
28 | * <li>Keeps tracks of Fields in an efficient manner. |
29 | * </ol> |
30 | * |
31 | * See also FormattedValueStringBuilderImpl. |
32 | * |
33 | * @author sffc (Shane Carr) |
34 | */ |
35 | class U_I18N_API FormattedStringBuilder : public UMemory { |
36 | private: |
37 | static const int32_t DEFAULT_CAPACITY = 40; |
38 | |
39 | template<typename T> |
40 | union ValueOrHeapArray { |
41 | T value[DEFAULT_CAPACITY]; |
42 | struct { |
43 | T *ptr; |
44 | int32_t capacity; |
45 | } heap; |
46 | }; |
47 | |
48 | public: |
49 | FormattedStringBuilder(); |
50 | |
51 | ~FormattedStringBuilder(); |
52 | |
53 | FormattedStringBuilder(const FormattedStringBuilder &other); |
54 | |
55 | // Convention: bottom 4 bits for field, top 4 bits for field category. |
56 | // Field category 0 implies the number category so that the number field |
57 | // literals can be directly passed as a Field type. |
58 | // See the helper functions in "StringBuilderFieldUtils" below. |
59 | // Exported as U_I18N_API so it can be used by other exports on Windows. |
60 | struct U_I18N_API Field { |
61 | uint8_t bits; |
62 | |
63 | Field() = default; |
64 | constexpr Field(uint8_t category, uint8_t field); |
65 | |
66 | inline UFieldCategory getCategory() const; |
67 | inline int32_t getField() const; |
68 | inline bool isNumeric() const; |
69 | inline bool isUndefined() const; |
70 | inline bool operator==(const Field& other) const; |
71 | inline bool operator!=(const Field& other) const; |
72 | }; |
73 | |
74 | FormattedStringBuilder &operator=(const FormattedStringBuilder &other); |
75 | |
76 | int32_t length() const; |
77 | |
78 | int32_t codePointCount() const; |
79 | |
80 | inline char16_t charAt(int32_t index) const { |
81 | U_ASSERT(index >= 0); |
82 | U_ASSERT(index < fLength); |
83 | return getCharPtr()[fZero + index]; |
84 | } |
85 | |
86 | inline Field fieldAt(int32_t index) const { |
87 | U_ASSERT(index >= 0); |
88 | U_ASSERT(index < fLength); |
89 | return getFieldPtr()[fZero + index]; |
90 | } |
91 | |
92 | UChar32 getFirstCodePoint() const; |
93 | |
94 | UChar32 getLastCodePoint() const; |
95 | |
96 | UChar32 codePointAt(int32_t index) const; |
97 | |
98 | UChar32 codePointBefore(int32_t index) const; |
99 | |
100 | FormattedStringBuilder &clear(); |
101 | |
102 | /** Appends a UTF-16 code unit. */ |
103 | inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) { |
104 | // appendCodePoint handles both code units and code points. |
105 | return insertCodePoint(fLength, codeUnit, field, status); |
106 | } |
107 | |
108 | /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */ |
109 | inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) { |
110 | // insertCodePoint handles both code units and code points. |
111 | return insertCodePoint(index, codeUnit, field, status); |
112 | } |
113 | |
114 | /** Appends a Unicode code point. */ |
115 | inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) { |
116 | return insertCodePoint(fLength, codePoint, field, status); |
117 | } |
118 | |
119 | /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */ |
120 | int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status); |
121 | |
122 | /** Appends a string. */ |
123 | inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) { |
124 | return insert(fLength, unistr, field, status); |
125 | } |
126 | |
127 | /** Inserts a string. Note: insert at index 0 is very efficient. */ |
128 | int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status); |
129 | |
130 | /** Inserts a substring. Note: insert at index 0 is very efficient. |
131 | * |
132 | * @param start Start index of the substring of unistr to be inserted. |
133 | * @param end End index of the substring of unistr to be inserted (exclusive). |
134 | */ |
135 | int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field, |
136 | UErrorCode &status); |
137 | |
138 | /** Deletes a substring and then inserts a string at that same position. |
139 | * Similar to JavaScript Array.prototype.splice(). |
140 | * |
141 | * @param startThis Start of the span to delete. |
142 | * @param endThis End of the span to delete (exclusive). |
143 | * @param unistr The string to insert at the deletion position. |
144 | * @param startOther Start index of the substring of unistr to be inserted. |
145 | * @param endOther End index of the substring of unistr to be inserted (exclusive). |
146 | */ |
147 | int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, |
148 | int32_t startOther, int32_t endOther, Field field, UErrorCode& status); |
149 | |
150 | /** Appends a formatted string. */ |
151 | int32_t append(const FormattedStringBuilder &other, UErrorCode &status); |
152 | |
153 | /** Inserts a formatted string. Note: insert at index 0 is very efficient. */ |
154 | int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status); |
155 | |
156 | /** |
157 | * Ensures that the string buffer contains a NUL terminator. The NUL terminator does |
158 | * not count toward the string length. Any further changes to the string (insert or |
159 | * append) may invalidate the NUL terminator. |
160 | * |
161 | * You should call this method after the formatted string is completely built if you |
162 | * plan to return a pointer to the string from a C API. |
163 | */ |
164 | void writeTerminator(UErrorCode& status); |
165 | |
166 | /** |
167 | * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed. |
168 | */ |
169 | UnicodeString toUnicodeString() const; |
170 | |
171 | /** |
172 | * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and |
173 | * unchanged. Slightly faster than toUnicodeString(). |
174 | */ |
175 | const UnicodeString toTempUnicodeString() const; |
176 | |
177 | UnicodeString toDebugString() const; |
178 | |
179 | const char16_t *chars() const; |
180 | |
181 | bool contentEquals(const FormattedStringBuilder &other) const; |
182 | |
183 | bool containsField(Field field) const; |
184 | |
185 | private: |
186 | bool fUsingHeap = false; |
187 | ValueOrHeapArray<char16_t> fChars; |
188 | ValueOrHeapArray<Field> fFields; |
189 | int32_t fZero = DEFAULT_CAPACITY / 2; |
190 | int32_t fLength = 0; |
191 | |
192 | inline char16_t *getCharPtr() { |
193 | return fUsingHeap ? fChars.heap.ptr : fChars.value; |
194 | } |
195 | |
196 | inline const char16_t *getCharPtr() const { |
197 | return fUsingHeap ? fChars.heap.ptr : fChars.value; |
198 | } |
199 | |
200 | inline Field *getFieldPtr() { |
201 | return fUsingHeap ? fFields.heap.ptr : fFields.value; |
202 | } |
203 | |
204 | inline const Field *getFieldPtr() const { |
205 | return fUsingHeap ? fFields.heap.ptr : fFields.value; |
206 | } |
207 | |
208 | inline int32_t getCapacity() const { |
209 | return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY; |
210 | } |
211 | |
212 | int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status); |
213 | |
214 | int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status); |
215 | |
216 | int32_t remove(int32_t index, int32_t count); |
217 | |
218 | friend class FormattedValueStringBuilderImpl; |
219 | }; |
220 | |
221 | static_assert( |
222 | std::is_pod<FormattedStringBuilder::Field>::value, |
223 | "Field should be a POD type for efficient initialization" ); |
224 | |
225 | constexpr FormattedStringBuilder::Field::Field(uint8_t category, uint8_t field) |
226 | : bits(( |
227 | #if 0 // /ICU-21081 |
228 | U_ASSERT(category <= 0xf), |
229 | U_ASSERT(field <= 0xf), |
230 | #endif |
231 | static_cast<uint8_t>((category << 4) | field) |
232 | )) {} |
233 | |
234 | /** |
235 | * Internal constant for the undefined field for use in FormattedStringBuilder. |
236 | */ |
237 | constexpr FormattedStringBuilder::Field kUndefinedField = {UFIELD_CATEGORY_UNDEFINED, 0}; |
238 | |
239 | /** |
240 | * Internal field to signal "numeric" when fields are not supported in NumberFormat. |
241 | */ |
242 | constexpr FormattedStringBuilder::Field kGeneralNumericField = {UFIELD_CATEGORY_UNDEFINED, 1}; |
243 | |
244 | inline UFieldCategory FormattedStringBuilder::Field::getCategory() const { |
245 | return static_cast<UFieldCategory>(bits >> 4); |
246 | } |
247 | |
248 | inline int32_t FormattedStringBuilder::Field::getField() const { |
249 | return bits & 0xf; |
250 | } |
251 | |
252 | inline bool FormattedStringBuilder::Field::isNumeric() const { |
253 | return getCategory() == UFIELD_CATEGORY_NUMBER || *this == kGeneralNumericField; |
254 | } |
255 | |
256 | inline bool FormattedStringBuilder::Field::isUndefined() const { |
257 | return getCategory() == UFIELD_CATEGORY_UNDEFINED; |
258 | } |
259 | |
260 | inline bool FormattedStringBuilder::Field::operator==(const Field& other) const { |
261 | return bits == other.bits; |
262 | } |
263 | |
264 | inline bool FormattedStringBuilder::Field::operator!=(const Field& other) const { |
265 | return bits != other.bits; |
266 | } |
267 | |
268 | U_NAMESPACE_END |
269 | |
270 | |
271 | #endif //__NUMBER_STRINGBUILDER_H__ |
272 | |
273 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
274 | |