1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7#ifndef __NUMBER_STRINGBUILDER_H__
8#define __NUMBER_STRINGBUILDER_H__
9
10
11#include <cstdint>
12#include "unicode/unum.h" // for UNUM_FIELD_COUNT
13#include "cstring.h"
14#include "uassert.h"
15#include "fphdlimp.h"
16
17U_NAMESPACE_BEGIN
18
19class FormattedValueStringBuilderImpl;
20
21/**
22 * A StringBuilder optimized for formatting. It implements the following key
23 * features beyond a UnicodeString:
24 *
25 * <ol>
26 * <li>Efficient prepend as well as append.
27 * <li>Keeps tracks of Fields in an efficient manner.
28 * </ol>
29 *
30 * See also FormattedValueStringBuilderImpl.
31 *
32 * @author sffc (Shane Carr)
33 */
34class U_I18N_API FormattedStringBuilder : public UMemory {
35 private:
36 static const int32_t DEFAULT_CAPACITY = 40;
37
38 template<typename T>
39 union ValueOrHeapArray {
40 T value[DEFAULT_CAPACITY];
41 struct {
42 T *ptr;
43 int32_t capacity;
44 } heap;
45 };
46
47 public:
48 FormattedStringBuilder();
49
50 ~FormattedStringBuilder();
51
52 FormattedStringBuilder(const FormattedStringBuilder &other);
53
54 // Convention: bottom 4 bits for field, top 4 bits for field category.
55 // Field category 0 implies the number category so that the number field
56 // literals can be directly passed as a Field type.
57 // See the helper functions in "StringBuilderFieldUtils" below.
58 typedef uint8_t Field;
59
60 FormattedStringBuilder &operator=(const FormattedStringBuilder &other);
61
62 int32_t length() const;
63
64 int32_t codePointCount() const;
65
66 inline char16_t charAt(int32_t index) const {
67 U_ASSERT(index >= 0);
68 U_ASSERT(index < fLength);
69 return getCharPtr()[fZero + index];
70 }
71
72 inline Field fieldAt(int32_t index) const {
73 U_ASSERT(index >= 0);
74 U_ASSERT(index < fLength);
75 return getFieldPtr()[fZero + index];
76 }
77
78 UChar32 getFirstCodePoint() const;
79
80 UChar32 getLastCodePoint() const;
81
82 UChar32 codePointAt(int32_t index) const;
83
84 UChar32 codePointBefore(int32_t index) const;
85
86 FormattedStringBuilder &clear();
87
88 /** Appends a UTF-16 code unit. */
89 inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) {
90 // appendCodePoint handles both code units and code points.
91 return insertCodePoint(fLength, codeUnit, field, status);
92 }
93
94 /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */
95 inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) {
96 // insertCodePoint handles both code units and code points.
97 return insertCodePoint(index, codeUnit, field, status);
98 }
99
100 /** Appends a Unicode code point. */
101 inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
102 return insertCodePoint(fLength, codePoint, field, status);
103 }
104
105 /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */
106 int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status);
107
108 /** Appends a string. */
109 inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) {
110 return insert(fLength, unistr, field, status);
111 }
112
113 /** Inserts a string. Note: insert at index 0 is very efficient. */
114 int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status);
115
116 /** Inserts a substring. Note: insert at index 0 is very efficient.
117 *
118 * @param start Start index of the substring of unistr to be inserted.
119 * @param end End index of the substring of unistr to be inserted (exclusive).
120 */
121 int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field,
122 UErrorCode &status);
123
124 /** Deletes a substring and then inserts a string at that same position.
125 * Similar to JavaScript Array.prototype.splice().
126 *
127 * @param startThis Start of the span to delete.
128 * @param endThis End of the span to delete (exclusive).
129 * @param unistr The string to insert at the deletion position.
130 * @param startOther Start index of the substring of unistr to be inserted.
131 * @param endOther End index of the substring of unistr to be inserted (exclusive).
132 */
133 int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
134 int32_t startOther, int32_t endOther, Field field, UErrorCode& status);
135
136 /** Appends a formatted string. */
137 int32_t append(const FormattedStringBuilder &other, UErrorCode &status);
138
139 /** Inserts a formatted string. Note: insert at index 0 is very efficient. */
140 int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status);
141
142 /**
143 * Ensures that the string buffer contains a NUL terminator. The NUL terminator does
144 * not count toward the string length. Any further changes to the string (insert or
145 * append) may invalidate the NUL terminator.
146 *
147 * You should call this method after the formatted string is completely built if you
148 * plan to return a pointer to the string from a C API.
149 */
150 void writeTerminator(UErrorCode& status);
151
152 /**
153 * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed.
154 */
155 UnicodeString toUnicodeString() const;
156
157 /**
158 * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and
159 * unchanged. Slightly faster than toUnicodeString().
160 */
161 const UnicodeString toTempUnicodeString() const;
162
163 UnicodeString toDebugString() const;
164
165 const char16_t *chars() const;
166
167 bool contentEquals(const FormattedStringBuilder &other) const;
168
169 bool containsField(Field field) const;
170
171 private:
172 bool fUsingHeap = false;
173 ValueOrHeapArray<char16_t> fChars;
174 ValueOrHeapArray<Field> fFields;
175 int32_t fZero = DEFAULT_CAPACITY / 2;
176 int32_t fLength = 0;
177
178 inline char16_t *getCharPtr() {
179 return fUsingHeap ? fChars.heap.ptr : fChars.value;
180 }
181
182 inline const char16_t *getCharPtr() const {
183 return fUsingHeap ? fChars.heap.ptr : fChars.value;
184 }
185
186 inline Field *getFieldPtr() {
187 return fUsingHeap ? fFields.heap.ptr : fFields.value;
188 }
189
190 inline const Field *getFieldPtr() const {
191 return fUsingHeap ? fFields.heap.ptr : fFields.value;
192 }
193
194 inline int32_t getCapacity() const {
195 return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY;
196 }
197
198 int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status);
199
200 int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
201
202 int32_t remove(int32_t index, int32_t count);
203
204 friend class FormattedValueStringBuilderImpl;
205};
206
207/**
208 * Helper functions for dealing with the Field typedef, which stores fields
209 * in a compressed format.
210 */
211class StringBuilderFieldUtils {
212public:
213 struct CategoryFieldPair {
214 int32_t category;
215 int32_t field;
216 };
217
218 /** Compile-time function to construct a Field from a category and a field */
219 template <int32_t category, int32_t field>
220 static constexpr FormattedStringBuilder::Field compress() {
221 static_assert(category != 0, "cannot use Undefined category in FieldUtils");
222 static_assert(category <= 0xf, "only 4 bits for category");
223 static_assert(field <= 0xf, "only 4 bits for field");
224 return static_cast<int8_t>((category << 4) | field);
225 }
226
227 /** Runtime inline function to unpack the category and field from the Field */
228 static inline CategoryFieldPair expand(FormattedStringBuilder::Field field) {
229 if (field == UNUM_FIELD_COUNT) {
230 return {UFIELD_CATEGORY_UNDEFINED, 0};
231 }
232 CategoryFieldPair ret = {
233 (field >> 4),
234 (field & 0xf)
235 };
236 if (ret.category == 0) {
237 ret.category = UFIELD_CATEGORY_NUMBER;
238 }
239 return ret;
240 }
241
242 static inline bool isNumericField(FormattedStringBuilder::Field field) {
243 int8_t category = field >> 4;
244 return category == 0 || category == UFIELD_CATEGORY_NUMBER;
245 }
246};
247
248U_NAMESPACE_END
249
250
251#endif //__NUMBER_STRINGBUILDER_H__
252
253#endif /* #if !UCONFIG_NO_FORMATTING */
254