1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5* Copyright (C) 2014-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7******************************************************************************
8* simpleformatter.h
9*/
10
11#ifndef __SIMPLEFORMATTER_H__
12#define __SIMPLEFORMATTER_H__
13
14/**
15 * \file
16 * \brief C++ API: Simple formatter, minimal subset of MessageFormat.
17 */
18
19#include "unicode/utypes.h"
20
21#if U_SHOW_CPLUSPLUS_API
22
23#include "unicode/unistr.h"
24
25U_NAMESPACE_BEGIN
26
27// Forward declaration:
28namespace number {
29namespace impl {
30class SimpleModifier;
31}
32}
33
34/**
35 * Formats simple patterns like "{1} was born in {0}".
36 * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
37 * Supports only numbered arguments with no type nor style parameters,
38 * and formats only string values.
39 * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
40 *
41 * Factory methods set error codes for syntax errors
42 * and for too few or too many arguments/placeholders.
43 *
44 * SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
45 *
46 * Example:
47 * <pre>
48 * UErrorCode errorCode = U_ZERO_ERROR;
49 * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
50 * UnicodeString result;
51 *
52 * // Output: "paul {born} in england"
53 * fmt.format("england", "paul", result, errorCode);
54 * </pre>
55 *
56 * This class is not intended for public subclassing.
57 *
58 * @see MessageFormat
59 * @see UMessagePatternApostropheMode
60 * @stable ICU 57
61 */
62class U_COMMON_API SimpleFormatter final : public UMemory {
63public:
64 /**
65 * Default constructor.
66 * @stable ICU 57
67 */
68 SimpleFormatter() : compiledPattern((char16_t)0) {}
69
70 /**
71 * Constructs a formatter from the pattern string.
72 *
73 * @param pattern The pattern string.
74 * @param errorCode ICU error code in/out parameter.
75 * Must fulfill U_SUCCESS before the function call.
76 * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
77 * @stable ICU 57
78 */
79 SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
80 applyPattern(pattern, errorCode);
81 }
82
83 /**
84 * Constructs a formatter from the pattern string.
85 * The number of arguments checked against the given limits is the
86 * highest argument number plus one, not the number of occurrences of arguments.
87 *
88 * @param pattern The pattern string.
89 * @param min The pattern must have at least this many arguments.
90 * @param max The pattern must have at most this many arguments.
91 * @param errorCode ICU error code in/out parameter.
92 * Must fulfill U_SUCCESS before the function call.
93 * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
94 * too few or too many arguments.
95 * @stable ICU 57
96 */
97 SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
98 UErrorCode &errorCode) {
99 applyPatternMinMaxArguments(pattern, min, max, errorCode);
100 }
101
102 /**
103 * Copy constructor.
104 * @stable ICU 57
105 */
106 SimpleFormatter(const SimpleFormatter& other)
107 : compiledPattern(other.compiledPattern) {}
108
109 /**
110 * Assignment operator.
111 * @stable ICU 57
112 */
113 SimpleFormatter &operator=(const SimpleFormatter& other);
114
115 /**
116 * Destructor.
117 * @stable ICU 57
118 */
119 ~SimpleFormatter();
120
121 /**
122 * Changes this object according to the new pattern.
123 *
124 * @param pattern The pattern string.
125 * @param errorCode ICU error code in/out parameter.
126 * Must fulfill U_SUCCESS before the function call.
127 * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
128 * @return true if U_SUCCESS(errorCode).
129 * @stable ICU 57
130 */
131 UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
132 return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
133 }
134
135 /**
136 * Changes this object according to the new pattern.
137 * The number of arguments checked against the given limits is the
138 * highest argument number plus one, not the number of occurrences of arguments.
139 *
140 * @param pattern The pattern string.
141 * @param min The pattern must have at least this many arguments.
142 * @param max The pattern must have at most this many arguments.
143 * @param errorCode ICU error code in/out parameter.
144 * Must fulfill U_SUCCESS before the function call.
145 * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
146 * too few or too many arguments.
147 * @return true if U_SUCCESS(errorCode).
148 * @stable ICU 57
149 */
150 UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
151 int32_t min, int32_t max, UErrorCode &errorCode);
152
153 /**
154 * @return The max argument number + 1.
155 * @stable ICU 57
156 */
157 int32_t getArgumentLimit() const {
158 return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
159 }
160
161 /**
162 * Formats the given value, appending to the appendTo builder.
163 * The argument value must not be the same object as appendTo.
164 * getArgumentLimit() must be at most 1.
165 *
166 * @param value0 Value for argument {0}.
167 * @param appendTo Gets the formatted pattern and value appended.
168 * @param errorCode ICU error code in/out parameter.
169 * Must fulfill U_SUCCESS before the function call.
170 * @return appendTo
171 * @stable ICU 57
172 */
173 UnicodeString &format(
174 const UnicodeString &value0,
175 UnicodeString &appendTo, UErrorCode &errorCode) const;
176
177 /**
178 * Formats the given values, appending to the appendTo builder.
179 * An argument value must not be the same object as appendTo.
180 * getArgumentLimit() must be at most 2.
181 *
182 * @param value0 Value for argument {0}.
183 * @param value1 Value for argument {1}.
184 * @param appendTo Gets the formatted pattern and values appended.
185 * @param errorCode ICU error code in/out parameter.
186 * Must fulfill U_SUCCESS before the function call.
187 * @return appendTo
188 * @stable ICU 57
189 */
190 UnicodeString &format(
191 const UnicodeString &value0,
192 const UnicodeString &value1,
193 UnicodeString &appendTo, UErrorCode &errorCode) const;
194
195 /**
196 * Formats the given values, appending to the appendTo builder.
197 * An argument value must not be the same object as appendTo.
198 * getArgumentLimit() must be at most 3.
199 *
200 * @param value0 Value for argument {0}.
201 * @param value1 Value for argument {1}.
202 * @param value2 Value for argument {2}.
203 * @param appendTo Gets the formatted pattern and values appended.
204 * @param errorCode ICU error code in/out parameter.
205 * Must fulfill U_SUCCESS before the function call.
206 * @return appendTo
207 * @stable ICU 57
208 */
209 UnicodeString &format(
210 const UnicodeString &value0,
211 const UnicodeString &value1,
212 const UnicodeString &value2,
213 UnicodeString &appendTo, UErrorCode &errorCode) const;
214
215 /**
216 * Formats the given values, appending to the appendTo string.
217 *
218 * @param values The argument values.
219 * An argument value must not be the same object as appendTo.
220 * Can be nullptr if valuesLength==getArgumentLimit()==0.
221 * @param valuesLength The length of the values array.
222 * Must be at least getArgumentLimit().
223 * @param appendTo Gets the formatted pattern and values appended.
224 * @param offsets offsets[i] receives the offset of where
225 * values[i] replaced pattern argument {i}.
226 * Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
227 * If there is no {i} in the pattern, then offsets[i] is set to -1.
228 * @param offsetsLength The length of the offsets array.
229 * @param errorCode ICU error code in/out parameter.
230 * Must fulfill U_SUCCESS before the function call.
231 * @return appendTo
232 * @stable ICU 57
233 */
234 UnicodeString &formatAndAppend(
235 const UnicodeString *const *values, int32_t valuesLength,
236 UnicodeString &appendTo,
237 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
238
239 /**
240 * Formats the given values, replacing the contents of the result string.
241 * May optimize by actually appending to the result if it is the same object
242 * as the value corresponding to the initial argument in the pattern.
243 *
244 * @param values The argument values.
245 * An argument value may be the same object as result.
246 * Can be nullptr if valuesLength==getArgumentLimit()==0.
247 * @param valuesLength The length of the values array.
248 * Must be at least getArgumentLimit().
249 * @param result Gets its contents replaced by the formatted pattern and values.
250 * @param offsets offsets[i] receives the offset of where
251 * values[i] replaced pattern argument {i}.
252 * Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
253 * If there is no {i} in the pattern, then offsets[i] is set to -1.
254 * @param offsetsLength The length of the offsets array.
255 * @param errorCode ICU error code in/out parameter.
256 * Must fulfill U_SUCCESS before the function call.
257 * @return result
258 * @stable ICU 57
259 */
260 UnicodeString &formatAndReplace(
261 const UnicodeString *const *values, int32_t valuesLength,
262 UnicodeString &result,
263 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
264
265 /**
266 * Returns the pattern text with none of the arguments.
267 * Like formatting with all-empty string values.
268 * @stable ICU 57
269 */
270 UnicodeString getTextWithNoArguments() const {
271 return getTextWithNoArguments(
272 compiledPattern.getBuffer(),
273 compiledPattern.length(),
274 nullptr,
275 0);
276 }
277
278#ifndef U_HIDE_INTERNAL_API
279 /**
280 * Returns the pattern text with none of the arguments.
281 * Like formatting with all-empty string values.
282 *
283 * TODO(ICU-20406): Replace this with an Iterator interface.
284 *
285 * @param offsets offsets[i] receives the offset of where {i} was located
286 * before it was replaced by an empty string.
287 * For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1.
288 * Can be nullptr if offsetsLength==0.
289 * If there is no {i} in the pattern, then offsets[i] is set to -1.
290 * @param offsetsLength The length of the offsets array.
291 *
292 * @internal
293 */
294 UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const {
295 return getTextWithNoArguments(
296 compiledPattern.getBuffer(),
297 compiledPattern.length(),
298 offsets,
299 offsetsLength);
300 }
301#endif // U_HIDE_INTERNAL_API
302
303private:
304 /**
305 * Binary representation of the compiled pattern.
306 * Index 0: One more than the highest argument number.
307 * Followed by zero or more arguments or literal-text segments.
308 *
309 * An argument is stored as its number, less than ARG_NUM_LIMIT.
310 * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
311 * followed by that many chars.
312 */
313 UnicodeString compiledPattern;
314
315 static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
316 int32_t compiledPatternLength) {
317 return compiledPatternLength == 0 ? 0 : compiledPattern[0];
318 }
319
320 static UnicodeString getTextWithNoArguments(
321 const char16_t *compiledPattern,
322 int32_t compiledPatternLength,
323 int32_t *offsets,
324 int32_t offsetsLength);
325
326 static UnicodeString &format(
327 const char16_t *compiledPattern, int32_t compiledPatternLength,
328 const UnicodeString *const *values,
329 UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
330 int32_t *offsets, int32_t offsetsLength,
331 UErrorCode &errorCode);
332
333 // Give access to internals to SimpleModifier for number formatting
334 friend class number::impl::SimpleModifier;
335};
336
337U_NAMESPACE_END
338
339#endif /* U_SHOW_CPLUSPLUS_API */
340
341#endif // __SIMPLEFORMATTER_H__
342