| 1 | // © 2018 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 |  | 
|---|
| 4 | // This file contains utilities to deal with static-allocated UnicodeSets. | 
|---|
| 5 | // | 
|---|
| 6 | // Common use case: you write a "private static final" UnicodeSet in Java, and | 
|---|
| 7 | // want something similarly easy in C++.  Originally written for number | 
|---|
| 8 | // parsing, but this header can be used for other applications. | 
|---|
| 9 | // | 
|---|
| 10 | // Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)` | 
|---|
| 11 | // | 
|---|
| 12 | // This file is in common instead of i18n because it is needed by ucurr.cpp. | 
|---|
| 13 | // | 
|---|
| 14 | // Author: sffc | 
|---|
| 15 |  | 
|---|
| 16 | #include "unicode/utypes.h" | 
|---|
| 17 |  | 
|---|
| 18 | #if !UCONFIG_NO_FORMATTING | 
|---|
| 19 | #ifndef __STATIC_UNICODE_SETS_H__ | 
|---|
| 20 | #define __STATIC_UNICODE_SETS_H__ | 
|---|
| 21 |  | 
|---|
| 22 | #include "unicode/uniset.h" | 
|---|
| 23 | #include "unicode/unistr.h" | 
|---|
| 24 |  | 
|---|
| 25 | U_NAMESPACE_BEGIN | 
|---|
| 26 | namespace unisets { | 
|---|
| 27 |  | 
|---|
| 28 | enum Key { | 
|---|
| 29 | // NONE is used to indicate null in chooseFrom(). | 
|---|
| 30 | // EMPTY is used to get an empty UnicodeSet. | 
|---|
| 31 | NONE = -1, | 
|---|
| 32 | EMPTY = 0, | 
|---|
| 33 |  | 
|---|
| 34 | // Ignorables | 
|---|
| 35 | DEFAULT_IGNORABLES, | 
|---|
| 36 | STRICT_IGNORABLES, | 
|---|
| 37 |  | 
|---|
| 38 | // Separators | 
|---|
| 39 | // Notes: | 
|---|
| 40 | // - COMMA is a superset of STRICT_COMMA | 
|---|
| 41 | // - PERIOD is a superset of SCRICT_PERIOD | 
|---|
| 42 | // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS | 
|---|
| 43 | // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS | 
|---|
| 44 | COMMA, | 
|---|
| 45 | PERIOD, | 
|---|
| 46 | STRICT_COMMA, | 
|---|
| 47 | STRICT_PERIOD, | 
|---|
| 48 | APOSTROPHE_SIGN, | 
|---|
| 49 | OTHER_GROUPING_SEPARATORS, | 
|---|
| 50 | ALL_SEPARATORS, | 
|---|
| 51 | STRICT_ALL_SEPARATORS, | 
|---|
| 52 |  | 
|---|
| 53 | // Symbols | 
|---|
| 54 | MINUS_SIGN, | 
|---|
| 55 | PLUS_SIGN, | 
|---|
| 56 | PERCENT_SIGN, | 
|---|
| 57 | PERMILLE_SIGN, | 
|---|
| 58 | INFINITY_SIGN, | 
|---|
| 59 |  | 
|---|
| 60 | // Currency Symbols | 
|---|
| 61 | DOLLAR_SIGN, | 
|---|
| 62 | POUND_SIGN, | 
|---|
| 63 | RUPEE_SIGN, | 
|---|
| 64 | YEN_SIGN, | 
|---|
| 65 | WON_SIGN, | 
|---|
| 66 |  | 
|---|
| 67 | // Other | 
|---|
| 68 | DIGITS, | 
|---|
| 69 |  | 
|---|
| 70 | // Combined Separators with Digits (for lead code points) | 
|---|
| 71 | DIGITS_OR_ALL_SEPARATORS, | 
|---|
| 72 | DIGITS_OR_STRICT_ALL_SEPARATORS, | 
|---|
| 73 |  | 
|---|
| 74 | // The number of elements in the enum. | 
|---|
| 75 | UNISETS_KEY_COUNT | 
|---|
| 76 | }; | 
|---|
| 77 |  | 
|---|
| 78 | /** | 
|---|
| 79 | * Gets the static-allocated UnicodeSet according to the provided key. The | 
|---|
| 80 | * pointer will be deleted during u_cleanup(); the caller should NOT delete it. | 
|---|
| 81 | * | 
|---|
| 82 | * Exported as U_COMMON_API for ucurr.cpp | 
|---|
| 83 | * | 
|---|
| 84 | * This method is always safe and OK to chain: in the case of a memory or other | 
|---|
| 85 | * error, it returns an empty set from static memory. | 
|---|
| 86 | * | 
|---|
| 87 | * Example: | 
|---|
| 88 | * | 
|---|
| 89 | *     UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...); | 
|---|
| 90 | * | 
|---|
| 91 | * @param key The desired UnicodeSet according to the enum in this file. | 
|---|
| 92 | * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but | 
|---|
| 93 | *         may be empty if an error occurred during data loading. | 
|---|
| 94 | */ | 
|---|
| 95 | U_COMMON_API const UnicodeSet* get(Key key); | 
|---|
| 96 |  | 
|---|
| 97 | /** | 
|---|
| 98 | * Checks if the UnicodeSet given by key1 contains the given string. | 
|---|
| 99 | * | 
|---|
| 100 | * Exported as U_COMMON_API for numparse_decimal.cpp | 
|---|
| 101 | * | 
|---|
| 102 | * @param str The string to check. | 
|---|
| 103 | * @param key1 The set to check. | 
|---|
| 104 | * @return key1 if the set contains str, or NONE if not. | 
|---|
| 105 | */ | 
|---|
| 106 | U_COMMON_API Key chooseFrom(UnicodeString str, Key key1); | 
|---|
| 107 |  | 
|---|
| 108 | /** | 
|---|
| 109 | * Checks if the UnicodeSet given by either key1 or key2 contains the string. | 
|---|
| 110 | * | 
|---|
| 111 | * Exported as U_COMMON_API for numparse_decimal.cpp | 
|---|
| 112 | * | 
|---|
| 113 | * @param str The string to check. | 
|---|
| 114 | * @param key1 The first set to check. | 
|---|
| 115 | * @param key2 The second set to check. | 
|---|
| 116 | * @return key1 if that set contains str; key2 if that set contains str; or | 
|---|
| 117 | *         NONE if neither set contains str. | 
|---|
| 118 | */ | 
|---|
| 119 | U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2); | 
|---|
| 120 |  | 
|---|
| 121 | // TODO: Load these from data: ICU-20108 | 
|---|
| 122 | // Unused in C++: | 
|---|
| 123 | // Key chooseCurrency(UnicodeString str); | 
|---|
| 124 | // Used instead: | 
|---|
| 125 | static const struct { | 
|---|
| 126 | Key key; | 
|---|
| 127 | UChar32 exemplar; | 
|---|
| 128 | } kCurrencyEntries[] = { | 
|---|
| 129 | {DOLLAR_SIGN, u'$'}, | 
|---|
| 130 | {POUND_SIGN, u'£'}, | 
|---|
| 131 | {RUPEE_SIGN, u'₹'}, | 
|---|
| 132 | {YEN_SIGN, u'¥'}, | 
|---|
| 133 | {WON_SIGN, u'₩'}, | 
|---|
| 134 | }; | 
|---|
| 135 |  | 
|---|
| 136 | } // namespace unisets | 
|---|
| 137 | U_NAMESPACE_END | 
|---|
| 138 |  | 
|---|
| 139 | #endif //__STATIC_UNICODE_SETS_H__ | 
|---|
| 140 | #endif /* #if !UCONFIG_NO_FORMATTING */ | 
|---|
| 141 |  | 
|---|