1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | // This file contains utilities to deal with static-allocated UnicodeSets. |
5 | // |
6 | // Common use case: you write a "private static final" UnicodeSet in Java, and |
7 | // want something similarly easy in C++. Originally written for number |
8 | // parsing, but this header can be used for other applications. |
9 | // |
10 | // Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)` |
11 | // |
12 | // This file is in common instead of i18n because it is needed by ucurr.cpp. |
13 | // |
14 | // Author: sffc |
15 | |
16 | #include "unicode/utypes.h" |
17 | |
18 | #if !UCONFIG_NO_FORMATTING |
19 | #ifndef __STATIC_UNICODE_SETS_H__ |
20 | #define __STATIC_UNICODE_SETS_H__ |
21 | |
22 | #include "unicode/uniset.h" |
23 | #include "unicode/unistr.h" |
24 | |
25 | U_NAMESPACE_BEGIN |
26 | namespace unisets { |
27 | |
28 | enum Key { |
29 | // NONE is used to indicate null in chooseFrom(). |
30 | // EMPTY is used to get an empty UnicodeSet. |
31 | NONE = -1, |
32 | EMPTY = 0, |
33 | |
34 | // Ignorables |
35 | DEFAULT_IGNORABLES, |
36 | STRICT_IGNORABLES, |
37 | |
38 | // Separators |
39 | // Notes: |
40 | // - COMMA is a superset of STRICT_COMMA |
41 | // - PERIOD is a superset of SCRICT_PERIOD |
42 | // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS |
43 | // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS |
44 | COMMA, |
45 | PERIOD, |
46 | STRICT_COMMA, |
47 | STRICT_PERIOD, |
48 | APOSTROPHE_SIGN, |
49 | OTHER_GROUPING_SEPARATORS, |
50 | ALL_SEPARATORS, |
51 | STRICT_ALL_SEPARATORS, |
52 | |
53 | // Symbols |
54 | MINUS_SIGN, |
55 | PLUS_SIGN, |
56 | PERCENT_SIGN, |
57 | PERMILLE_SIGN, |
58 | INFINITY_SIGN, |
59 | |
60 | // Currency Symbols |
61 | DOLLAR_SIGN, |
62 | POUND_SIGN, |
63 | RUPEE_SIGN, |
64 | YEN_SIGN, |
65 | WON_SIGN, |
66 | |
67 | // Other |
68 | DIGITS, |
69 | |
70 | // Combined Separators with Digits (for lead code points) |
71 | DIGITS_OR_ALL_SEPARATORS, |
72 | DIGITS_OR_STRICT_ALL_SEPARATORS, |
73 | |
74 | // The number of elements in the enum. |
75 | UNISETS_KEY_COUNT |
76 | }; |
77 | |
78 | /** |
79 | * Gets the static-allocated UnicodeSet according to the provided key. The |
80 | * pointer will be deleted during u_cleanup(); the caller should NOT delete it. |
81 | * |
82 | * Exported as U_COMMON_API for ucurr.cpp |
83 | * |
84 | * This method is always safe and OK to chain: in the case of a memory or other |
85 | * error, it returns an empty set from static memory. |
86 | * |
87 | * Example: |
88 | * |
89 | * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...); |
90 | * |
91 | * @param key The desired UnicodeSet according to the enum in this file. |
92 | * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but |
93 | * may be empty if an error occurred during data loading. |
94 | */ |
95 | U_COMMON_API const UnicodeSet* get(Key key); |
96 | |
97 | /** |
98 | * Checks if the UnicodeSet given by key1 contains the given string. |
99 | * |
100 | * Exported as U_COMMON_API for numparse_decimal.cpp |
101 | * |
102 | * @param str The string to check. |
103 | * @param key1 The set to check. |
104 | * @return key1 if the set contains str, or NONE if not. |
105 | */ |
106 | U_COMMON_API Key chooseFrom(UnicodeString str, Key key1); |
107 | |
108 | /** |
109 | * Checks if the UnicodeSet given by either key1 or key2 contains the string. |
110 | * |
111 | * Exported as U_COMMON_API for numparse_decimal.cpp |
112 | * |
113 | * @param str The string to check. |
114 | * @param key1 The first set to check. |
115 | * @param key2 The second set to check. |
116 | * @return key1 if that set contains str; key2 if that set contains str; or |
117 | * NONE if neither set contains str. |
118 | */ |
119 | U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2); |
120 | |
121 | // TODO: Load these from data: ICU-20108 |
122 | // Unused in C++: |
123 | // Key chooseCurrency(UnicodeString str); |
124 | // Used instead: |
125 | static const struct { |
126 | Key key; |
127 | UChar32 exemplar; |
128 | } kCurrencyEntries[] = { |
129 | {DOLLAR_SIGN, u'$'}, |
130 | {POUND_SIGN, u'£'}, |
131 | {RUPEE_SIGN, u'₹'}, |
132 | {YEN_SIGN, u'¥'}, |
133 | {WON_SIGN, u'₩'}, |
134 | }; |
135 | |
136 | } // namespace unisets |
137 | U_NAMESPACE_END |
138 | |
139 | #endif //__STATIC_UNICODE_SETS_H__ |
140 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
141 | |