1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// This file contains utilities to deal with static-allocated UnicodeSets.
5//
6// Common use case: you write a "private static final" UnicodeSet in Java, and
7// want something similarly easy in C++. Originally written for number
8// parsing, but this header can be used for other applications.
9//
10// Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)`
11//
12// This file is in common instead of i18n because it is needed by ucurr.cpp.
13//
14// Author: sffc
15
16#include "unicode/utypes.h"
17
18#if !UCONFIG_NO_FORMATTING
19#ifndef __STATIC_UNICODE_SETS_H__
20#define __STATIC_UNICODE_SETS_H__
21
22#include "unicode/uniset.h"
23#include "unicode/unistr.h"
24
25U_NAMESPACE_BEGIN
26namespace unisets {
27
28enum Key {
29 // NONE is used to indicate null in chooseFrom().
30 // EMPTY is used to get an empty UnicodeSet.
31 NONE = -1,
32 EMPTY = 0,
33
34 // Ignorables
35 DEFAULT_IGNORABLES,
36 STRICT_IGNORABLES,
37
38 // Separators
39 // Notes:
40 // - COMMA is a superset of STRICT_COMMA
41 // - PERIOD is a superset of SCRICT_PERIOD
42 // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
43 // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
44 COMMA,
45 PERIOD,
46 STRICT_COMMA,
47 STRICT_PERIOD,
48 APOSTROPHE_SIGN,
49 OTHER_GROUPING_SEPARATORS,
50 ALL_SEPARATORS,
51 STRICT_ALL_SEPARATORS,
52
53 // Symbols
54 MINUS_SIGN,
55 PLUS_SIGN,
56 PERCENT_SIGN,
57 PERMILLE_SIGN,
58 INFINITY_SIGN,
59
60 // Currency Symbols
61 DOLLAR_SIGN,
62 POUND_SIGN,
63 RUPEE_SIGN,
64 YEN_SIGN,
65 WON_SIGN,
66
67 // Other
68 DIGITS,
69
70 // Combined Separators with Digits (for lead code points)
71 DIGITS_OR_ALL_SEPARATORS,
72 DIGITS_OR_STRICT_ALL_SEPARATORS,
73
74 // The number of elements in the enum.
75 UNISETS_KEY_COUNT
76};
77
78/**
79 * Gets the static-allocated UnicodeSet according to the provided key. The
80 * pointer will be deleted during u_cleanup(); the caller should NOT delete it.
81 *
82 * Exported as U_COMMON_API for ucurr.cpp
83 *
84 * This method is always safe and OK to chain: in the case of a memory or other
85 * error, it returns an empty set from static memory.
86 *
87 * Example:
88 *
89 * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...);
90 *
91 * @param key The desired UnicodeSet according to the enum in this file.
92 * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
93 * may be empty if an error occurred during data loading.
94 */
95U_COMMON_API const UnicodeSet* get(Key key);
96
97/**
98 * Checks if the UnicodeSet given by key1 contains the given string.
99 *
100 * Exported as U_COMMON_API for numparse_decimal.cpp
101 *
102 * @param str The string to check.
103 * @param key1 The set to check.
104 * @return key1 if the set contains str, or NONE if not.
105 */
106U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
107
108/**
109 * Checks if the UnicodeSet given by either key1 or key2 contains the string.
110 *
111 * Exported as U_COMMON_API for numparse_decimal.cpp
112 *
113 * @param str The string to check.
114 * @param key1 The first set to check.
115 * @param key2 The second set to check.
116 * @return key1 if that set contains str; key2 if that set contains str; or
117 * NONE if neither set contains str.
118 */
119U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
120
121// TODO: Load these from data: ICU-20108
122// Unused in C++:
123// Key chooseCurrency(UnicodeString str);
124// Used instead:
125static const struct {
126 Key key;
127 UChar32 exemplar;
128} kCurrencyEntries[] = {
129 {DOLLAR_SIGN, u'$'},
130 {POUND_SIGN, u'£'},
131 {RUPEE_SIGN, u'₹'},
132 {YEN_SIGN, u'¥'},
133 {WON_SIGN, u'₩'},
134};
135
136} // namespace unisets
137U_NAMESPACE_END
138
139#endif //__STATIC_UNICODE_SETS_H__
140#endif /* #if !UCONFIG_NO_FORMATTING */
141