| 1 | // © 2017 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | |
| 4 | #include "unicode/utypes.h" |
| 5 | |
| 6 | #if !UCONFIG_NO_FORMATTING |
| 7 | |
| 8 | #include "unicode/simpleformatter.h" |
| 9 | #include "unicode/ures.h" |
| 10 | #include "ureslocs.h" |
| 11 | #include "charstr.h" |
| 12 | #include "uresimp.h" |
| 13 | #include "number_longnames.h" |
| 14 | #include "number_microprops.h" |
| 15 | #include <algorithm> |
| 16 | #include "cstring.h" |
| 17 | #include "util.h" |
| 18 | |
| 19 | using namespace icu; |
| 20 | using namespace icu::number; |
| 21 | using namespace icu::number::impl; |
| 22 | |
| 23 | namespace { |
| 24 | |
| 25 | constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; |
| 26 | constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; |
| 27 | constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 2; |
| 28 | |
| 29 | static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { |
| 30 | // pluralKeyword can also be "dnam" or "per" |
| 31 | if (uprv_strcmp(pluralKeyword, "dnam" ) == 0) { |
| 32 | return DNAM_INDEX; |
| 33 | } else if (uprv_strcmp(pluralKeyword, "per" ) == 0) { |
| 34 | return PER_INDEX; |
| 35 | } else { |
| 36 | StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); |
| 37 | return plural; |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | static UnicodeString getWithPlural( |
| 42 | const UnicodeString* strings, |
| 43 | StandardPlural::Form plural, |
| 44 | UErrorCode& status) { |
| 45 | UnicodeString result = strings[plural]; |
| 46 | if (result.isBogus()) { |
| 47 | result = strings[StandardPlural::Form::OTHER]; |
| 48 | } |
| 49 | if (result.isBogus()) { |
| 50 | // There should always be data in the "other" plural variant. |
| 51 | status = U_INTERNAL_PROGRAM_ERROR; |
| 52 | } |
| 53 | return result; |
| 54 | } |
| 55 | |
| 56 | |
| 57 | ////////////////////////// |
| 58 | /// BEGIN DATA LOADING /// |
| 59 | ////////////////////////// |
| 60 | |
| 61 | class PluralTableSink : public ResourceSink { |
| 62 | public: |
| 63 | explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { |
| 64 | // Initialize the array to bogus strings. |
| 65 | for (int32_t i = 0; i < ARRAY_LENGTH; i++) { |
| 66 | outArray[i].setToBogus(); |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { |
| 71 | ResourceTable pluralsTable = value.getTable(status); |
| 72 | if (U_FAILURE(status)) { return; } |
| 73 | for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { |
| 74 | int32_t index = getIndex(key, status); |
| 75 | if (U_FAILURE(status)) { return; } |
| 76 | if (!outArray[index].isBogus()) { |
| 77 | continue; |
| 78 | } |
| 79 | outArray[index] = value.getUnicodeString(status); |
| 80 | if (U_FAILURE(status)) { return; } |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | private: |
| 85 | UnicodeString *outArray; |
| 86 | }; |
| 87 | |
| 88 | // NOTE: outArray MUST have room for all StandardPlural values. No bounds checking is performed. |
| 89 | |
| 90 | void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width, |
| 91 | UnicodeString *outArray, UErrorCode &status) { |
| 92 | PluralTableSink sink(outArray); |
| 93 | LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); |
| 94 | if (U_FAILURE(status)) { return; } |
| 95 | |
| 96 | // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... |
| 97 | // TODO(ICU-20400): Get duration-*-person data properly with aliases. |
| 98 | StringPiece subtypeForResource; |
| 99 | int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unit.getSubtype())); |
| 100 | if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person" ) == 0) { |
| 101 | subtypeForResource = {unit.getSubtype(), subtypeLen - 7}; |
| 102 | } else { |
| 103 | subtypeForResource = unit.getSubtype(); |
| 104 | } |
| 105 | |
| 106 | CharString key; |
| 107 | key.append("units" , status); |
| 108 | if (width == UNUM_UNIT_WIDTH_NARROW) { |
| 109 | key.append("Narrow" , status); |
| 110 | } else if (width == UNUM_UNIT_WIDTH_SHORT) { |
| 111 | key.append("Short" , status); |
| 112 | } |
| 113 | key.append("/" , status); |
| 114 | key.append(unit.getType(), status); |
| 115 | key.append("/" , status); |
| 116 | key.append(subtypeForResource, status); |
| 117 | |
| 118 | UErrorCode localStatus = U_ZERO_ERROR; |
| 119 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); |
| 120 | if (width == UNUM_UNIT_WIDTH_SHORT) { |
| 121 | if (U_FAILURE(localStatus)) { |
| 122 | status = localStatus; |
| 123 | } |
| 124 | return; |
| 125 | } |
| 126 | |
| 127 | // TODO(ICU-13353): The fallback to short does not work in ICU4C. |
| 128 | // Manually fall back to short (this is done automatically in Java). |
| 129 | key.clear(); |
| 130 | key.append("unitsShort/" , status); |
| 131 | key.append(unit.getType(), status); |
| 132 | key.append("/" , status); |
| 133 | key.append(subtypeForResource, status); |
| 134 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status); |
| 135 | } |
| 136 | |
| 137 | void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, |
| 138 | UErrorCode &status) { |
| 139 | // In ICU4J, this method gets a CurrencyData from CurrencyData.provider. |
| 140 | // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C? |
| 141 | PluralTableSink sink(outArray); |
| 142 | LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status)); |
| 143 | if (U_FAILURE(status)) { return; } |
| 144 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns" , sink, status); |
| 145 | if (U_FAILURE(status)) { return; } |
| 146 | for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { |
| 147 | UnicodeString &pattern = outArray[i]; |
| 148 | if (pattern.isBogus()) { |
| 149 | continue; |
| 150 | } |
| 151 | int32_t longNameLen = 0; |
| 152 | const char16_t *longName = ucurr_getPluralName( |
| 153 | currency.getISOCurrency(), |
| 154 | locale.getName(), |
| 155 | nullptr /* isChoiceFormat */, |
| 156 | StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)), |
| 157 | &longNameLen, |
| 158 | &status); |
| 159 | // Example pattern from data: "{0} {1}" |
| 160 | // Example output after find-and-replace: "{0} US dollars" |
| 161 | pattern.findAndReplace(UnicodeString(u"{1}" ), UnicodeString(longName, longNameLen)); |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &width, UErrorCode& status) { |
| 166 | LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); |
| 167 | if (U_FAILURE(status)) { return {}; } |
| 168 | CharString key; |
| 169 | key.append("units" , status); |
| 170 | if (width == UNUM_UNIT_WIDTH_NARROW) { |
| 171 | key.append("Narrow" , status); |
| 172 | } else if (width == UNUM_UNIT_WIDTH_SHORT) { |
| 173 | key.append("Short" , status); |
| 174 | } |
| 175 | key.append("/compound/per" , status); |
| 176 | int32_t len = 0; |
| 177 | const UChar* ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); |
| 178 | return UnicodeString(ptr, len); |
| 179 | } |
| 180 | |
| 181 | //////////////////////// |
| 182 | /// END DATA LOADING /// |
| 183 | //////////////////////// |
| 184 | |
| 185 | } // namespace |
| 186 | |
| 187 | LongNameHandler* |
| 188 | LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const MeasureUnit &perUnit, |
| 189 | const UNumberUnitWidth &width, const PluralRules *rules, |
| 190 | const MicroPropsGenerator *parent, UErrorCode &status) { |
| 191 | MeasureUnit unit = unitRef; |
| 192 | if (uprv_strcmp(perUnit.getType(), "none" ) != 0) { |
| 193 | // Compound unit: first try to simplify (e.g., meters per second is its own unit). |
| 194 | bool isResolved = false; |
| 195 | MeasureUnit resolved = MeasureUnit::resolveUnitPerUnit(unit, perUnit, &isResolved); |
| 196 | if (isResolved) { |
| 197 | unit = resolved; |
| 198 | } else { |
| 199 | // No simplified form is available. |
| 200 | return forCompoundUnit(loc, unit, perUnit, width, rules, parent, status); |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | auto* result = new LongNameHandler(rules, parent); |
| 205 | if (result == nullptr) { |
| 206 | status = U_MEMORY_ALLOCATION_ERROR; |
| 207 | return nullptr; |
| 208 | } |
| 209 | UnicodeString simpleFormats[ARRAY_LENGTH]; |
| 210 | getMeasureData(loc, unit, width, simpleFormats, status); |
| 211 | if (U_FAILURE(status)) { return result; } |
| 212 | result->simpleFormatsToModifiers(simpleFormats, UNUM_MEASURE_UNIT_FIELD, status); |
| 213 | return result; |
| 214 | } |
| 215 | |
| 216 | LongNameHandler* |
| 217 | LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, |
| 218 | const UNumberUnitWidth &width, const PluralRules *rules, |
| 219 | const MicroPropsGenerator *parent, UErrorCode &status) { |
| 220 | auto* result = new LongNameHandler(rules, parent); |
| 221 | if (result == nullptr) { |
| 222 | status = U_MEMORY_ALLOCATION_ERROR; |
| 223 | return nullptr; |
| 224 | } |
| 225 | UnicodeString primaryData[ARRAY_LENGTH]; |
| 226 | getMeasureData(loc, unit, width, primaryData, status); |
| 227 | if (U_FAILURE(status)) { return result; } |
| 228 | UnicodeString secondaryData[ARRAY_LENGTH]; |
| 229 | getMeasureData(loc, perUnit, width, secondaryData, status); |
| 230 | if (U_FAILURE(status)) { return result; } |
| 231 | |
| 232 | UnicodeString perUnitFormat; |
| 233 | if (!secondaryData[PER_INDEX].isBogus()) { |
| 234 | perUnitFormat = secondaryData[PER_INDEX]; |
| 235 | } else { |
| 236 | UnicodeString rawPerUnitFormat = getPerUnitFormat(loc, width, status); |
| 237 | if (U_FAILURE(status)) { return result; } |
| 238 | // rawPerUnitFormat is something like "{0}/{1}"; we need to substitute in the secondary unit. |
| 239 | SimpleFormatter compiled(rawPerUnitFormat, 2, 2, status); |
| 240 | if (U_FAILURE(status)) { return result; } |
| 241 | UnicodeString secondaryFormat = getWithPlural(secondaryData, StandardPlural::Form::ONE, status); |
| 242 | if (U_FAILURE(status)) { return result; } |
| 243 | SimpleFormatter secondaryCompiled(secondaryFormat, 1, 1, status); |
| 244 | if (U_FAILURE(status)) { return result; } |
| 245 | UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim(); |
| 246 | // TODO: Why does UnicodeString need to be explicit in the following line? |
| 247 | compiled.format(UnicodeString(u"{0}" ), secondaryString, perUnitFormat, status); |
| 248 | if (U_FAILURE(status)) { return result; } |
| 249 | } |
| 250 | result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_MEASURE_UNIT_FIELD, status); |
| 251 | return result; |
| 252 | } |
| 253 | |
| 254 | UnicodeString LongNameHandler::getUnitDisplayName( |
| 255 | const Locale& loc, |
| 256 | const MeasureUnit& unit, |
| 257 | UNumberUnitWidth width, |
| 258 | UErrorCode& status) { |
| 259 | if (U_FAILURE(status)) { |
| 260 | return ICU_Utility::makeBogusString(); |
| 261 | } |
| 262 | UnicodeString simpleFormats[ARRAY_LENGTH]; |
| 263 | getMeasureData(loc, unit, width, simpleFormats, status); |
| 264 | return simpleFormats[DNAM_INDEX]; |
| 265 | } |
| 266 | |
| 267 | UnicodeString LongNameHandler::getUnitPattern( |
| 268 | const Locale& loc, |
| 269 | const MeasureUnit& unit, |
| 270 | UNumberUnitWidth width, |
| 271 | StandardPlural::Form pluralForm, |
| 272 | UErrorCode& status) { |
| 273 | if (U_FAILURE(status)) { |
| 274 | return ICU_Utility::makeBogusString(); |
| 275 | } |
| 276 | UnicodeString simpleFormats[ARRAY_LENGTH]; |
| 277 | getMeasureData(loc, unit, width, simpleFormats, status); |
| 278 | // The above already handles fallback from other widths to short |
| 279 | if (U_FAILURE(status)) { |
| 280 | return ICU_Utility::makeBogusString(); |
| 281 | } |
| 282 | // Now handle fallback from other plural forms to OTHER |
| 283 | return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]: |
| 284 | simpleFormats[StandardPlural::Form::OTHER]; |
| 285 | } |
| 286 | |
| 287 | LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, |
| 288 | const PluralRules *rules, |
| 289 | const MicroPropsGenerator *parent, |
| 290 | UErrorCode &status) { |
| 291 | auto* result = new LongNameHandler(rules, parent); |
| 292 | if (result == nullptr) { |
| 293 | status = U_MEMORY_ALLOCATION_ERROR; |
| 294 | return nullptr; |
| 295 | } |
| 296 | UnicodeString simpleFormats[ARRAY_LENGTH]; |
| 297 | getCurrencyLongNameData(loc, currency, simpleFormats, status); |
| 298 | if (U_FAILURE(status)) { return nullptr; } |
| 299 | result->simpleFormatsToModifiers(simpleFormats, UNUM_CURRENCY_FIELD, status); |
| 300 | return result; |
| 301 | } |
| 302 | |
| 303 | void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, |
| 304 | UErrorCode &status) { |
| 305 | for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { |
| 306 | StandardPlural::Form plural = static_cast<StandardPlural::Form>(i); |
| 307 | UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status); |
| 308 | if (U_FAILURE(status)) { return; } |
| 309 | SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); |
| 310 | if (U_FAILURE(status)) { return; } |
| 311 | fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural}); |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, |
| 316 | Field field, UErrorCode &status) { |
| 317 | SimpleFormatter trailCompiled(trailFormat, 1, 1, status); |
| 318 | if (U_FAILURE(status)) { return; } |
| 319 | for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { |
| 320 | StandardPlural::Form plural = static_cast<StandardPlural::Form>(i); |
| 321 | UnicodeString leadFormat = getWithPlural(leadFormats, plural, status); |
| 322 | if (U_FAILURE(status)) { return; } |
| 323 | UnicodeString compoundFormat; |
| 324 | trailCompiled.format(leadFormat, compoundFormat, status); |
| 325 | if (U_FAILURE(status)) { return; } |
| 326 | SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); |
| 327 | if (U_FAILURE(status)) { return; } |
| 328 | fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural}); |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, |
| 333 | UErrorCode &status) const { |
| 334 | parent->processQuantity(quantity, micros, status); |
| 335 | StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status); |
| 336 | micros.modOuter = &fModifiers[pluralForm]; |
| 337 | } |
| 338 | |
| 339 | const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { |
| 340 | return &fModifiers[plural]; |
| 341 | } |
| 342 | |
| 343 | #endif /* #if !UCONFIG_NO_FORMATTING */ |
| 344 | |