1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING |
7 | #ifndef __NUMBER_STRINGBUILDER_H__ |
8 | #define __NUMBER_STRINGBUILDER_H__ |
9 | |
10 | |
11 | #include <cstdint> |
12 | #include "unicode/unum.h" // for UNUM_FIELD_COUNT |
13 | #include "cstring.h" |
14 | #include "uassert.h" |
15 | #include "fphdlimp.h" |
16 | |
17 | U_NAMESPACE_BEGIN |
18 | |
19 | class FormattedValueStringBuilderImpl; |
20 | |
21 | /** |
22 | * A StringBuilder optimized for formatting. It implements the following key |
23 | * features beyond a UnicodeString: |
24 | * |
25 | * <ol> |
26 | * <li>Efficient prepend as well as append. |
27 | * <li>Keeps tracks of Fields in an efficient manner. |
28 | * </ol> |
29 | * |
30 | * See also FormattedValueStringBuilderImpl. |
31 | * |
32 | * @author sffc (Shane Carr) |
33 | */ |
34 | class U_I18N_API FormattedStringBuilder : public UMemory { |
35 | private: |
36 | static const int32_t DEFAULT_CAPACITY = 40; |
37 | |
38 | template<typename T> |
39 | union ValueOrHeapArray { |
40 | T value[DEFAULT_CAPACITY]; |
41 | struct { |
42 | T *ptr; |
43 | int32_t capacity; |
44 | } heap; |
45 | }; |
46 | |
47 | public: |
48 | FormattedStringBuilder(); |
49 | |
50 | ~FormattedStringBuilder(); |
51 | |
52 | FormattedStringBuilder(const FormattedStringBuilder &other); |
53 | |
54 | // Convention: bottom 4 bits for field, top 4 bits for field category. |
55 | // Field category 0 implies the number category so that the number field |
56 | // literals can be directly passed as a Field type. |
57 | // See the helper functions in "StringBuilderFieldUtils" below. |
58 | typedef uint8_t Field; |
59 | |
60 | FormattedStringBuilder &operator=(const FormattedStringBuilder &other); |
61 | |
62 | int32_t length() const; |
63 | |
64 | int32_t codePointCount() const; |
65 | |
66 | inline char16_t charAt(int32_t index) const { |
67 | U_ASSERT(index >= 0); |
68 | U_ASSERT(index < fLength); |
69 | return getCharPtr()[fZero + index]; |
70 | } |
71 | |
72 | inline Field fieldAt(int32_t index) const { |
73 | U_ASSERT(index >= 0); |
74 | U_ASSERT(index < fLength); |
75 | return getFieldPtr()[fZero + index]; |
76 | } |
77 | |
78 | UChar32 getFirstCodePoint() const; |
79 | |
80 | UChar32 getLastCodePoint() const; |
81 | |
82 | UChar32 codePointAt(int32_t index) const; |
83 | |
84 | UChar32 codePointBefore(int32_t index) const; |
85 | |
86 | FormattedStringBuilder &clear(); |
87 | |
88 | /** Appends a UTF-16 code unit. */ |
89 | inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) { |
90 | // appendCodePoint handles both code units and code points. |
91 | return insertCodePoint(fLength, codeUnit, field, status); |
92 | } |
93 | |
94 | /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */ |
95 | inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) { |
96 | // insertCodePoint handles both code units and code points. |
97 | return insertCodePoint(index, codeUnit, field, status); |
98 | } |
99 | |
100 | /** Appends a Unicode code point. */ |
101 | inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) { |
102 | return insertCodePoint(fLength, codePoint, field, status); |
103 | } |
104 | |
105 | /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */ |
106 | int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status); |
107 | |
108 | /** Appends a string. */ |
109 | inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) { |
110 | return insert(fLength, unistr, field, status); |
111 | } |
112 | |
113 | /** Inserts a string. Note: insert at index 0 is very efficient. */ |
114 | int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status); |
115 | |
116 | /** Inserts a substring. Note: insert at index 0 is very efficient. |
117 | * |
118 | * @param start Start index of the substring of unistr to be inserted. |
119 | * @param end End index of the substring of unistr to be inserted (exclusive). |
120 | */ |
121 | int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field, |
122 | UErrorCode &status); |
123 | |
124 | /** Deletes a substring and then inserts a string at that same position. |
125 | * Similar to JavaScript Array.prototype.splice(). |
126 | * |
127 | * @param startThis Start of the span to delete. |
128 | * @param endThis End of the span to delete (exclusive). |
129 | * @param unistr The string to insert at the deletion position. |
130 | * @param startOther Start index of the substring of unistr to be inserted. |
131 | * @param endOther End index of the substring of unistr to be inserted (exclusive). |
132 | */ |
133 | int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, |
134 | int32_t startOther, int32_t endOther, Field field, UErrorCode& status); |
135 | |
136 | /** Appends a formatted string. */ |
137 | int32_t append(const FormattedStringBuilder &other, UErrorCode &status); |
138 | |
139 | /** Inserts a formatted string. Note: insert at index 0 is very efficient. */ |
140 | int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status); |
141 | |
142 | /** |
143 | * Ensures that the string buffer contains a NUL terminator. The NUL terminator does |
144 | * not count toward the string length. Any further changes to the string (insert or |
145 | * append) may invalidate the NUL terminator. |
146 | * |
147 | * You should call this method after the formatted string is completely built if you |
148 | * plan to return a pointer to the string from a C API. |
149 | */ |
150 | void writeTerminator(UErrorCode& status); |
151 | |
152 | /** |
153 | * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed. |
154 | */ |
155 | UnicodeString toUnicodeString() const; |
156 | |
157 | /** |
158 | * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and |
159 | * unchanged. Slightly faster than toUnicodeString(). |
160 | */ |
161 | const UnicodeString toTempUnicodeString() const; |
162 | |
163 | UnicodeString toDebugString() const; |
164 | |
165 | const char16_t *chars() const; |
166 | |
167 | bool contentEquals(const FormattedStringBuilder &other) const; |
168 | |
169 | bool containsField(Field field) const; |
170 | |
171 | private: |
172 | bool fUsingHeap = false; |
173 | ValueOrHeapArray<char16_t> fChars; |
174 | ValueOrHeapArray<Field> fFields; |
175 | int32_t fZero = DEFAULT_CAPACITY / 2; |
176 | int32_t fLength = 0; |
177 | |
178 | inline char16_t *getCharPtr() { |
179 | return fUsingHeap ? fChars.heap.ptr : fChars.value; |
180 | } |
181 | |
182 | inline const char16_t *getCharPtr() const { |
183 | return fUsingHeap ? fChars.heap.ptr : fChars.value; |
184 | } |
185 | |
186 | inline Field *getFieldPtr() { |
187 | return fUsingHeap ? fFields.heap.ptr : fFields.value; |
188 | } |
189 | |
190 | inline const Field *getFieldPtr() const { |
191 | return fUsingHeap ? fFields.heap.ptr : fFields.value; |
192 | } |
193 | |
194 | inline int32_t getCapacity() const { |
195 | return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY; |
196 | } |
197 | |
198 | int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status); |
199 | |
200 | int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status); |
201 | |
202 | int32_t remove(int32_t index, int32_t count); |
203 | |
204 | friend class FormattedValueStringBuilderImpl; |
205 | }; |
206 | |
207 | /** |
208 | * Helper functions for dealing with the Field typedef, which stores fields |
209 | * in a compressed format. |
210 | */ |
211 | class StringBuilderFieldUtils { |
212 | public: |
213 | struct CategoryFieldPair { |
214 | int32_t category; |
215 | int32_t field; |
216 | }; |
217 | |
218 | /** Compile-time function to construct a Field from a category and a field */ |
219 | template <int32_t category, int32_t field> |
220 | static constexpr FormattedStringBuilder::Field compress() { |
221 | static_assert(category != 0, "cannot use Undefined category in FieldUtils" ); |
222 | static_assert(category <= 0xf, "only 4 bits for category" ); |
223 | static_assert(field <= 0xf, "only 4 bits for field" ); |
224 | return static_cast<int8_t>((category << 4) | field); |
225 | } |
226 | |
227 | /** Runtime inline function to unpack the category and field from the Field */ |
228 | static inline CategoryFieldPair expand(FormattedStringBuilder::Field field) { |
229 | if (field == UNUM_FIELD_COUNT) { |
230 | return {UFIELD_CATEGORY_UNDEFINED, 0}; |
231 | } |
232 | CategoryFieldPair ret = { |
233 | (field >> 4), |
234 | (field & 0xf) |
235 | }; |
236 | if (ret.category == 0) { |
237 | ret.category = UFIELD_CATEGORY_NUMBER; |
238 | } |
239 | return ret; |
240 | } |
241 | |
242 | static inline bool isNumericField(FormattedStringBuilder::Field field) { |
243 | int8_t category = field >> 4; |
244 | return category == 0 || category == UFIELD_CATEGORY_NUMBER; |
245 | } |
246 | }; |
247 | |
248 | U_NAMESPACE_END |
249 | |
250 | |
251 | #endif //__NUMBER_STRINGBUILDER_H__ |
252 | |
253 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
254 | |