| 1 | // © 2018 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | |
| 4 | // This file contains utilities to deal with static-allocated UnicodeSets. |
| 5 | // |
| 6 | // Common use case: you write a "private static final" UnicodeSet in Java, and |
| 7 | // want something similarly easy in C++. Originally written for number |
| 8 | // parsing, but this header can be used for other applications. |
| 9 | // |
| 10 | // Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)` |
| 11 | // |
| 12 | // This file is in common instead of i18n because it is needed by ucurr.cpp. |
| 13 | // |
| 14 | // Author: sffc |
| 15 | |
| 16 | #include "unicode/utypes.h" |
| 17 | |
| 18 | #if !UCONFIG_NO_FORMATTING |
| 19 | #ifndef __STATIC_UNICODE_SETS_H__ |
| 20 | #define __STATIC_UNICODE_SETS_H__ |
| 21 | |
| 22 | #include "unicode/uniset.h" |
| 23 | #include "unicode/unistr.h" |
| 24 | |
| 25 | U_NAMESPACE_BEGIN |
| 26 | namespace unisets { |
| 27 | |
| 28 | enum Key { |
| 29 | // NONE is used to indicate null in chooseFrom(). |
| 30 | // EMPTY is used to get an empty UnicodeSet. |
| 31 | NONE = -1, |
| 32 | EMPTY = 0, |
| 33 | |
| 34 | // Ignorables |
| 35 | DEFAULT_IGNORABLES, |
| 36 | STRICT_IGNORABLES, |
| 37 | |
| 38 | // Separators |
| 39 | // Notes: |
| 40 | // - COMMA is a superset of STRICT_COMMA |
| 41 | // - PERIOD is a superset of SCRICT_PERIOD |
| 42 | // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS |
| 43 | // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS |
| 44 | COMMA, |
| 45 | PERIOD, |
| 46 | STRICT_COMMA, |
| 47 | STRICT_PERIOD, |
| 48 | APOSTROPHE_SIGN, |
| 49 | OTHER_GROUPING_SEPARATORS, |
| 50 | ALL_SEPARATORS, |
| 51 | STRICT_ALL_SEPARATORS, |
| 52 | |
| 53 | // Symbols |
| 54 | MINUS_SIGN, |
| 55 | PLUS_SIGN, |
| 56 | PERCENT_SIGN, |
| 57 | PERMILLE_SIGN, |
| 58 | INFINITY_SIGN, |
| 59 | |
| 60 | // Currency Symbols |
| 61 | DOLLAR_SIGN, |
| 62 | POUND_SIGN, |
| 63 | RUPEE_SIGN, |
| 64 | YEN_SIGN, |
| 65 | WON_SIGN, |
| 66 | |
| 67 | // Other |
| 68 | DIGITS, |
| 69 | |
| 70 | // Combined Separators with Digits (for lead code points) |
| 71 | DIGITS_OR_ALL_SEPARATORS, |
| 72 | DIGITS_OR_STRICT_ALL_SEPARATORS, |
| 73 | |
| 74 | // The number of elements in the enum. |
| 75 | UNISETS_KEY_COUNT |
| 76 | }; |
| 77 | |
| 78 | /** |
| 79 | * Gets the static-allocated UnicodeSet according to the provided key. The |
| 80 | * pointer will be deleted during u_cleanup(); the caller should NOT delete it. |
| 81 | * |
| 82 | * Exported as U_COMMON_API for ucurr.cpp |
| 83 | * |
| 84 | * This method is always safe and OK to chain: in the case of a memory or other |
| 85 | * error, it returns an empty set from static memory. |
| 86 | * |
| 87 | * Example: |
| 88 | * |
| 89 | * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...); |
| 90 | * |
| 91 | * @param key The desired UnicodeSet according to the enum in this file. |
| 92 | * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but |
| 93 | * may be empty if an error occurred during data loading. |
| 94 | */ |
| 95 | U_COMMON_API const UnicodeSet* get(Key key); |
| 96 | |
| 97 | /** |
| 98 | * Checks if the UnicodeSet given by key1 contains the given string. |
| 99 | * |
| 100 | * Exported as U_COMMON_API for numparse_decimal.cpp |
| 101 | * |
| 102 | * @param str The string to check. |
| 103 | * @param key1 The set to check. |
| 104 | * @return key1 if the set contains str, or NONE if not. |
| 105 | */ |
| 106 | U_COMMON_API Key chooseFrom(UnicodeString str, Key key1); |
| 107 | |
| 108 | /** |
| 109 | * Checks if the UnicodeSet given by either key1 or key2 contains the string. |
| 110 | * |
| 111 | * Exported as U_COMMON_API for numparse_decimal.cpp |
| 112 | * |
| 113 | * @param str The string to check. |
| 114 | * @param key1 The first set to check. |
| 115 | * @param key2 The second set to check. |
| 116 | * @return key1 if that set contains str; key2 if that set contains str; or |
| 117 | * NONE if neither set contains str. |
| 118 | */ |
| 119 | U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2); |
| 120 | |
| 121 | // TODO: Load these from data: ICU-20108 |
| 122 | // Unused in C++: |
| 123 | // Key chooseCurrency(UnicodeString str); |
| 124 | // Used instead: |
| 125 | static const struct { |
| 126 | Key key; |
| 127 | UChar32 exemplar; |
| 128 | } kCurrencyEntries[] = { |
| 129 | {DOLLAR_SIGN, u'$'}, |
| 130 | {POUND_SIGN, u'£'}, |
| 131 | {RUPEE_SIGN, u'₹'}, |
| 132 | {YEN_SIGN, u'¥'}, |
| 133 | {WON_SIGN, u'₩'}, |
| 134 | }; |
| 135 | |
| 136 | } // namespace unisets |
| 137 | U_NAMESPACE_END |
| 138 | |
| 139 | #endif //__STATIC_UNICODE_SETS_H__ |
| 140 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
| 141 | |