1// © 2020 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// Extra functions for MeasureUnit not needed for all clients.
5// Separate .o file so that it can be removed for modularity.
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_FORMATTING
10
11// Allow implicit conversion from char16_t* to UnicodeString for this file:
12// Helpful in toString methods and elsewhere.
13#define UNISTR_FROM_STRING_EXPLICIT
14
15#include <cstdlib>
16#include "cstring.h"
17#include "measunit_impl.h"
18#include "uarrsort.h"
19#include "uassert.h"
20#include "ucln_in.h"
21#include "umutex.h"
22#include "unicode/errorcode.h"
23#include "unicode/localpointer.h"
24#include "unicode/measunit.h"
25#include "unicode/ucharstrie.h"
26#include "unicode/ucharstriebuilder.h"
27
28#include "cstr.h"
29
30U_NAMESPACE_BEGIN
31
32
33namespace {
34
35// TODO: Propose a new error code for this?
36constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
37
38// Trie value offset for SI Prefixes. This is big enough to ensure we only
39// insert positive integers into the trie.
40constexpr int32_t kSIPrefixOffset = 64;
41
42// Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
43constexpr int32_t kCompoundPartOffset = 128;
44
45enum CompoundPart {
46 // Represents "-per-"
47 COMPOUND_PART_PER = kCompoundPartOffset,
48 // Represents "-"
49 COMPOUND_PART_TIMES,
50 // Represents "-and-"
51 COMPOUND_PART_AND,
52};
53
54// Trie value offset for "per-".
55constexpr int32_t kInitialCompoundPartOffset = 192;
56
57enum InitialCompoundPart {
58 // Represents "per-", the only compound part that can appear at the start of
59 // an identifier.
60 INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
61};
62
63// Trie value offset for powers like "square-", "cubic-", "p2-" etc.
64constexpr int32_t kPowerPartOffset = 256;
65
66enum PowerPart {
67 POWER_PART_P2 = kPowerPartOffset + 2,
68 POWER_PART_P3,
69 POWER_PART_P4,
70 POWER_PART_P5,
71 POWER_PART_P6,
72 POWER_PART_P7,
73 POWER_PART_P8,
74 POWER_PART_P9,
75 POWER_PART_P10,
76 POWER_PART_P11,
77 POWER_PART_P12,
78 POWER_PART_P13,
79 POWER_PART_P14,
80 POWER_PART_P15,
81};
82
83// Trie value offset for simple units, e.g. "gram", "nautical-mile",
84// "fluid-ounce-imperial".
85constexpr int32_t kSimpleUnitOffset = 512;
86
87const struct SIPrefixStrings {
88 const char* const string;
89 UMeasureSIPrefix value;
90} gSIPrefixStrings[] = {
91 { "yotta", UMEASURE_SI_PREFIX_YOTTA },
92 { "zetta", UMEASURE_SI_PREFIX_ZETTA },
93 { "exa", UMEASURE_SI_PREFIX_EXA },
94 { "peta", UMEASURE_SI_PREFIX_PETA },
95 { "tera", UMEASURE_SI_PREFIX_TERA },
96 { "giga", UMEASURE_SI_PREFIX_GIGA },
97 { "mega", UMEASURE_SI_PREFIX_MEGA },
98 { "kilo", UMEASURE_SI_PREFIX_KILO },
99 { "hecto", UMEASURE_SI_PREFIX_HECTO },
100 { "deka", UMEASURE_SI_PREFIX_DEKA },
101 { "deci", UMEASURE_SI_PREFIX_DECI },
102 { "centi", UMEASURE_SI_PREFIX_CENTI },
103 { "milli", UMEASURE_SI_PREFIX_MILLI },
104 { "micro", UMEASURE_SI_PREFIX_MICRO },
105 { "nano", UMEASURE_SI_PREFIX_NANO },
106 { "pico", UMEASURE_SI_PREFIX_PICO },
107 { "femto", UMEASURE_SI_PREFIX_FEMTO },
108 { "atto", UMEASURE_SI_PREFIX_ATTO },
109 { "zepto", UMEASURE_SI_PREFIX_ZEPTO },
110 { "yocto", UMEASURE_SI_PREFIX_YOCTO },
111};
112
113// TODO(ICU-21059): Get this list from data
114const char16_t* const gSimpleUnits[] = {
115 u"candela",
116 u"carat",
117 u"gram",
118 u"ounce",
119 u"ounce-troy",
120 u"pound",
121 u"kilogram",
122 u"stone",
123 u"ton",
124 u"metric-ton",
125 u"earth-mass",
126 u"solar-mass",
127 u"point",
128 u"inch",
129 u"foot",
130 u"yard",
131 u"meter",
132 u"fathom",
133 u"furlong",
134 u"mile",
135 u"nautical-mile",
136 u"mile-scandinavian",
137 u"100-kilometer",
138 u"earth-radius",
139 u"solar-radius",
140 u"astronomical-unit",
141 u"light-year",
142 u"parsec",
143 u"second",
144 u"minute",
145 u"hour",
146 u"day",
147 u"day-person",
148 u"week",
149 u"week-person",
150 u"month",
151 u"month-person",
152 u"year",
153 u"year-person",
154 u"decade",
155 u"century",
156 u"ampere",
157 u"fahrenheit",
158 u"kelvin",
159 u"celsius",
160 u"arc-second",
161 u"arc-minute",
162 u"degree",
163 u"radian",
164 u"revolution",
165 u"item",
166 u"mole",
167 u"permillion",
168 u"permyriad",
169 u"permille",
170 u"percent",
171 u"karat",
172 u"portion",
173 u"bit",
174 u"byte",
175 u"dot",
176 u"pixel",
177 u"em",
178 u"hertz",
179 u"newton",
180 u"pound-force",
181 u"pascal",
182 u"bar",
183 u"atmosphere",
184 u"ofhg",
185 u"electronvolt",
186 u"dalton",
187 u"joule",
188 u"calorie",
189 u"british-thermal-unit",
190 u"foodcalorie",
191 u"therm-us",
192 u"watt",
193 u"horsepower",
194 u"solar-luminosity",
195 u"volt",
196 u"ohm",
197 u"dunam",
198 u"acre",
199 u"hectare",
200 u"teaspoon",
201 u"tablespoon",
202 u"fluid-ounce-imperial",
203 u"fluid-ounce",
204 u"cup",
205 u"cup-metric",
206 u"pint",
207 u"pint-metric",
208 u"quart",
209 u"liter",
210 u"gallon",
211 u"gallon-imperial",
212 u"bushel",
213 u"barrel",
214 u"knot",
215 u"g-force",
216 u"lux",
217};
218
219icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
220
221char16_t* kSerializedUnitExtrasStemTrie = nullptr;
222
223UBool U_CALLCONV cleanupUnitExtras() {
224 uprv_free(kSerializedUnitExtrasStemTrie);
225 kSerializedUnitExtrasStemTrie = nullptr;
226 gUnitExtrasInitOnce.reset();
227 return TRUE;
228}
229
230void U_CALLCONV initUnitExtras(UErrorCode& status) {
231 ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
232
233 UCharsTrieBuilder b(status);
234 if (U_FAILURE(status)) { return; }
235
236 // Add SI prefixes
237 for (const auto& siPrefixInfo : gSIPrefixStrings) {
238 UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV);
239 b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status);
240 }
241 if (U_FAILURE(status)) { return; }
242
243 // Add syntax parts (compound, power prefixes)
244 b.add(u"-per-", COMPOUND_PART_PER, status);
245 b.add(u"-", COMPOUND_PART_TIMES, status);
246 b.add(u"-and-", COMPOUND_PART_AND, status);
247 b.add(u"per-", INITIAL_COMPOUND_PART_PER, status);
248 b.add(u"square-", POWER_PART_P2, status);
249 b.add(u"cubic-", POWER_PART_P3, status);
250 b.add(u"p2-", POWER_PART_P2, status);
251 b.add(u"p3-", POWER_PART_P3, status);
252 b.add(u"p4-", POWER_PART_P4, status);
253 b.add(u"p5-", POWER_PART_P5, status);
254 b.add(u"p6-", POWER_PART_P6, status);
255 b.add(u"p7-", POWER_PART_P7, status);
256 b.add(u"p8-", POWER_PART_P8, status);
257 b.add(u"p9-", POWER_PART_P9, status);
258 b.add(u"p10-", POWER_PART_P10, status);
259 b.add(u"p11-", POWER_PART_P11, status);
260 b.add(u"p12-", POWER_PART_P12, status);
261 b.add(u"p13-", POWER_PART_P13, status);
262 b.add(u"p14-", POWER_PART_P14, status);
263 b.add(u"p15-", POWER_PART_P15, status);
264 if (U_FAILURE(status)) { return; }
265
266 // Add sanctioned simple units by offset
267 int32_t simpleUnitOffset = kSimpleUnitOffset;
268 for (auto simpleUnit : gSimpleUnits) {
269 b.add(simpleUnit, simpleUnitOffset++, status);
270 }
271
272 // Build the CharsTrie
273 // TODO: Use SLOW or FAST here?
274 UnicodeString result;
275 b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
276 if (U_FAILURE(status)) { return; }
277
278 // Copy the result into the global constant pointer
279 size_t numBytes = result.length() * sizeof(char16_t);
280 kSerializedUnitExtrasStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
281 uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
282}
283
284class Token {
285public:
286 Token(int32_t match) : fMatch(match) {}
287
288 enum Type {
289 TYPE_UNDEFINED,
290 TYPE_SI_PREFIX,
291 // Token type for "-per-", "-", and "-and-".
292 TYPE_COMPOUND_PART,
293 // Token type for "per-".
294 TYPE_INITIAL_COMPOUND_PART,
295 TYPE_POWER_PART,
296 TYPE_SIMPLE_UNIT,
297 };
298
299 // Calling getType() is invalid, resulting in an assertion failure, if Token
300 // value isn't positive.
301 Type getType() const {
302 U_ASSERT(fMatch > 0);
303 if (fMatch < kCompoundPartOffset) {
304 return TYPE_SI_PREFIX;
305 }
306 if (fMatch < kInitialCompoundPartOffset) {
307 return TYPE_COMPOUND_PART;
308 }
309 if (fMatch < kPowerPartOffset) {
310 return TYPE_INITIAL_COMPOUND_PART;
311 }
312 if (fMatch < kSimpleUnitOffset) {
313 return TYPE_POWER_PART;
314 }
315 return TYPE_SIMPLE_UNIT;
316 }
317
318 UMeasureSIPrefix getSIPrefix() const {
319 U_ASSERT(getType() == TYPE_SI_PREFIX);
320 return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
321 }
322
323 // Valid only for tokens with type TYPE_COMPOUND_PART.
324 int32_t getMatch() const {
325 U_ASSERT(getType() == TYPE_COMPOUND_PART);
326 return fMatch;
327 }
328
329 int32_t getInitialCompoundPart() const {
330 // Even if there is only one InitialCompoundPart value, we have this
331 // function for the simplicity of code consistency.
332 U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
333 // Defensive: if this assert fails, code using this function also needs
334 // to change.
335 U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
336 return fMatch;
337 }
338
339 int8_t getPower() const {
340 U_ASSERT(getType() == TYPE_POWER_PART);
341 return static_cast<int8_t>(fMatch - kPowerPartOffset);
342 }
343
344 int32_t getSimpleUnitIndex() const {
345 U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
346 return fMatch - kSimpleUnitOffset;
347 }
348
349private:
350 int32_t fMatch;
351};
352
353class Parser {
354public:
355 /**
356 * Factory function for parsing the given identifier.
357 *
358 * @param source The identifier to parse. This function does not make a copy
359 * of source: the underlying string that source points at, must outlive the
360 * parser.
361 * @param status ICU error code.
362 */
363 static Parser from(StringPiece source, UErrorCode& status) {
364 if (U_FAILURE(status)) {
365 return Parser();
366 }
367 umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
368 if (U_FAILURE(status)) {
369 return Parser();
370 }
371 return Parser(source);
372 }
373
374 MeasureUnitImpl parse(UErrorCode& status) {
375 MeasureUnitImpl result;
376 parseImpl(result, status);
377 return result;
378 }
379
380private:
381 // Tracks parser progress: the offset into fSource.
382 int32_t fIndex = 0;
383
384 // Since we're not owning this memory, whatever is passed to the constructor
385 // should live longer than this Parser - and the parser shouldn't return any
386 // references to that string.
387 StringPiece fSource;
388 UCharsTrie fTrie;
389
390 // Set to true when we've seen a "-per-" or a "per-", after which all units
391 // are in the denominator. Until we find an "-and-", at which point the
392 // identifier is invalid pending TODO(CLDR-13700).
393 bool fAfterPer = false;
394
395 Parser() : fSource(""), fTrie(u"") {}
396
397 Parser(StringPiece source)
398 : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {}
399
400 inline bool hasNext() const {
401 return fIndex < fSource.length();
402 }
403
404 // Returns the next Token parsed from fSource, advancing fIndex to the end
405 // of that token in fSource. In case of U_FAILURE(status), the token
406 // returned will cause an abort if getType() is called on it.
407 Token nextToken(UErrorCode& status) {
408 fTrie.reset();
409 int32_t match = -1;
410 // Saves the position in the fSource string for the end of the most
411 // recent matching token.
412 int32_t previ = -1;
413 // Find the longest token that matches a value in the trie:
414 while (fIndex < fSource.length()) {
415 auto result = fTrie.next(fSource.data()[fIndex++]);
416 if (result == USTRINGTRIE_NO_MATCH) {
417 break;
418 } else if (result == USTRINGTRIE_NO_VALUE) {
419 continue;
420 }
421 U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
422 match = fTrie.getValue();
423 previ = fIndex;
424 if (result == USTRINGTRIE_FINAL_VALUE) {
425 break;
426 }
427 U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
428 // continue;
429 }
430
431 if (match < 0) {
432 status = kUnitIdentifierSyntaxError;
433 } else {
434 fIndex = previ;
435 }
436 return Token(match);
437 }
438
439 /**
440 * Returns the next "single unit" via result.
441 *
442 * If a "-per-" was parsed, the result will have appropriate negative
443 * dimensionality.
444 *
445 * Returns an error if we parse both compound units and "-and-", since mixed
446 * compound units are not yet supported - TODO(CLDR-13700).
447 *
448 * @param result Will be overwritten by the result, if status shows success.
449 * @param sawAnd If an "-and-" was parsed prior to finding the "single
450 * unit", sawAnd is set to true. If not, it is left as is.
451 * @param status ICU error code.
452 */
453 void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
454 if (U_FAILURE(status)) {
455 return;
456 }
457
458 // state:
459 // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
460 // 1 = power token seen (will not accept another power token)
461 // 2 = SI prefix token seen (will not accept a power or SI prefix token)
462 int32_t state = 0;
463
464 bool atStart = fIndex == 0;
465 Token token = nextToken(status);
466 if (U_FAILURE(status)) { return; }
467
468 if (atStart) {
469 // Identifiers optionally start with "per-".
470 if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
471 U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
472 fAfterPer = true;
473 result.dimensionality = -1;
474
475 token = nextToken(status);
476 if (U_FAILURE(status)) { return; }
477 }
478 } else {
479 // All other SingleUnit's are separated from previous SingleUnit's
480 // via a compound part:
481 if (token.getType() != Token::TYPE_COMPOUND_PART) {
482 status = kUnitIdentifierSyntaxError;
483 return;
484 }
485
486 switch (token.getMatch()) {
487 case COMPOUND_PART_PER:
488 if (sawAnd) {
489 // Mixed compound units not yet supported,
490 // TODO(CLDR-13700).
491 status = kUnitIdentifierSyntaxError;
492 return;
493 }
494 fAfterPer = true;
495 result.dimensionality = -1;
496 break;
497
498 case COMPOUND_PART_TIMES:
499 if (fAfterPer) {
500 result.dimensionality = -1;
501 }
502 break;
503
504 case COMPOUND_PART_AND:
505 if (fAfterPer) {
506 // Can't start with "-and-", and mixed compound units
507 // not yet supported, TODO(CLDR-13700).
508 status = kUnitIdentifierSyntaxError;
509 return;
510 }
511 sawAnd = true;
512 break;
513 }
514
515 token = nextToken(status);
516 if (U_FAILURE(status)) { return; }
517 }
518
519 // Read tokens until we have a complete SingleUnit or we reach the end.
520 while (true) {
521 switch (token.getType()) {
522 case Token::TYPE_POWER_PART:
523 if (state > 0) {
524 status = kUnitIdentifierSyntaxError;
525 return;
526 }
527 result.dimensionality *= token.getPower();
528 state = 1;
529 break;
530
531 case Token::TYPE_SI_PREFIX:
532 if (state > 1) {
533 status = kUnitIdentifierSyntaxError;
534 return;
535 }
536 result.siPrefix = token.getSIPrefix();
537 state = 2;
538 break;
539
540 case Token::TYPE_SIMPLE_UNIT:
541 result.index = token.getSimpleUnitIndex();
542 return;
543
544 default:
545 status = kUnitIdentifierSyntaxError;
546 return;
547 }
548
549 if (!hasNext()) {
550 // We ran out of tokens before finding a complete single unit.
551 status = kUnitIdentifierSyntaxError;
552 return;
553 }
554 token = nextToken(status);
555 if (U_FAILURE(status)) {
556 return;
557 }
558 }
559 }
560
561 /// @param result is modified, not overridden. Caller must pass in a
562 /// default-constructed (empty) MeasureUnitImpl instance.
563 void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
564 if (U_FAILURE(status)) {
565 return;
566 }
567 if (fSource.empty()) {
568 // The dimenionless unit: nothing to parse. leave result as is.
569 return;
570 }
571 int32_t unitNum = 0;
572 while (hasNext()) {
573 bool sawAnd = false;
574 SingleUnitImpl singleUnit;
575 nextSingleUnit(singleUnit, sawAnd, status);
576 if (U_FAILURE(status)) {
577 return;
578 }
579 U_ASSERT(!singleUnit.isDimensionless());
580 bool added = result.append(singleUnit, status);
581 if (sawAnd && !added) {
582 // Two similar units are not allowed in a mixed unit
583 status = kUnitIdentifierSyntaxError;
584 return;
585 }
586 if ((++unitNum) >= 2) {
587 // nextSingleUnit fails appropriately for "per" and "and" in the
588 // same identifier. It doesn't fail for other compound units
589 // (COMPOUND_PART_TIMES). Consequently we take care of that
590 // here.
591 UMeasureUnitComplexity complexity =
592 sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
593 if (unitNum == 2) {
594 U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
595 result.complexity = complexity;
596 } else if (result.complexity != complexity) {
597 // Can't have mixed compound units
598 status = kUnitIdentifierSyntaxError;
599 return;
600 }
601 }
602 }
603 }
604};
605
606int32_t U_CALLCONV
607compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
608 auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
609 auto realRight = static_cast<const SingleUnitImpl* const*>(right);
610 return (*realLeft)->compareTo(**realRight);
611}
612
613/**
614 * Generate the identifier string for a single unit in place.
615 *
616 * Does not support the dimensionless SingleUnitImpl: calling serializeSingle
617 * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
618 *
619 * @param first If singleUnit is part of a compound unit, and not its first
620 * single unit, set this to false. Otherwise: set to true.
621 */
622void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
623 if (first && singleUnit.dimensionality < 0) {
624 // Essentially the "unary per". For compound units with a numerator, the
625 // caller takes care of the "binary per".
626 output.append("per-", status);
627 }
628
629 if (singleUnit.isDimensionless()) {
630 status = U_INTERNAL_PROGRAM_ERROR;
631 return;
632 }
633 int8_t posPower = std::abs(singleUnit.dimensionality);
634 if (posPower == 0) {
635 status = U_INTERNAL_PROGRAM_ERROR;
636 } else if (posPower == 1) {
637 // no-op
638 } else if (posPower == 2) {
639 output.append("square-", status);
640 } else if (posPower == 3) {
641 output.append("cubic-", status);
642 } else if (posPower < 10) {
643 output.append('p', status);
644 output.append(posPower + '0', status);
645 output.append('-', status);
646 } else if (posPower <= 15) {
647 output.append("p1", status);
648 output.append('0' + (posPower % 10), status);
649 output.append('-', status);
650 } else {
651 status = kUnitIdentifierSyntaxError;
652 }
653 if (U_FAILURE(status)) {
654 return;
655 }
656
657 if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) {
658 for (const auto& siPrefixInfo : gSIPrefixStrings) {
659 if (siPrefixInfo.value == singleUnit.siPrefix) {
660 output.append(siPrefixInfo.string, status);
661 break;
662 }
663 }
664 }
665 if (U_FAILURE(status)) {
666 return;
667 }
668
669 output.appendInvariantChars(gSimpleUnits[singleUnit.index], status);
670}
671
672/**
673 * Normalize a MeasureUnitImpl and generate the identifier string in place.
674 */
675void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
676 if (U_FAILURE(status)) {
677 return;
678 }
679 U_ASSERT(impl.identifier.isEmpty());
680 if (impl.units.length() == 0) {
681 // Dimensionless, constructed by the default constructor: no appending
682 // to impl.identifier, we wish it to contain the zero-length string.
683 return;
684 }
685 if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
686 // Note: don't sort a MIXED unit
687 uprv_sortArray(
688 impl.units.getAlias(),
689 impl.units.length(),
690 sizeof(impl.units[0]),
691 compareSingleUnits,
692 nullptr,
693 false,
694 &status);
695 if (U_FAILURE(status)) {
696 return;
697 }
698 }
699 serializeSingle(*impl.units[0], true, impl.identifier, status);
700 if (impl.units.length() == 1) {
701 return;
702 }
703 for (int32_t i = 1; i < impl.units.length(); i++) {
704 const SingleUnitImpl& prev = *impl.units[i-1];
705 const SingleUnitImpl& curr = *impl.units[i];
706 if (impl.complexity == UMEASURE_UNIT_MIXED) {
707 impl.identifier.append("-and-", status);
708 serializeSingle(curr, true, impl.identifier, status);
709 } else {
710 if (prev.dimensionality > 0 && curr.dimensionality < 0) {
711 impl.identifier.append("-per-", status);
712 } else {
713 impl.identifier.append('-', status);
714 }
715 serializeSingle(curr, false, impl.identifier, status);
716 }
717 }
718
719}
720
721/**
722 * Appends a SingleUnitImpl to a MeasureUnitImpl.
723 *
724 * @return true if a new item was added. If unit is the dimensionless unit, it
725 * is never added: the return value will always be false.
726 */
727bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
728 if (unit.isDimensionless()) {
729 // We don't append dimensionless units.
730 return false;
731 }
732 // Find a similar unit that already exists, to attempt to coalesce
733 SingleUnitImpl* oldUnit = nullptr;
734 for (int32_t i = 0; i < impl.units.length(); i++) {
735 auto* candidate = impl.units[i];
736 if (candidate->isCompatibleWith(unit)) {
737 oldUnit = candidate;
738 }
739 }
740 if (oldUnit) {
741 // Both dimensionalities will be positive, or both will be negative, by
742 // virtue of isCompatibleWith().
743 oldUnit->dimensionality += unit.dimensionality;
744 } else {
745 SingleUnitImpl* destination = impl.units.emplaceBack();
746 if (!destination) {
747 status = U_MEMORY_ALLOCATION_ERROR;
748 return false;
749 }
750 *destination = unit;
751 }
752 return (oldUnit == nullptr);
753}
754
755} // namespace
756
757
758SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
759 MeasureUnitImpl temp;
760 const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
761 if (U_FAILURE(status)) {
762 return {};
763 }
764 if (impl.units.length() == 0) {
765 return {};
766 }
767 if (impl.units.length() == 1) {
768 return *impl.units[0];
769 }
770 status = U_ILLEGAL_ARGUMENT_ERROR;
771 return {};
772}
773
774MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
775 MeasureUnitImpl temp;
776 temp.append(*this, status);
777 return std::move(temp).build(status);
778}
779
780
781MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
782 return Parser::from(identifier, status).parse(status);
783}
784
785const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
786 const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
787 if (measureUnit.fImpl) {
788 return *measureUnit.fImpl;
789 } else {
790 memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
791 return memory;
792 }
793}
794
795MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
796 const MeasureUnit& measureUnit, UErrorCode& status) {
797 if (measureUnit.fImpl) {
798 return measureUnit.fImpl->copy(status);
799 } else {
800 return Parser::from(measureUnit.getIdentifier(), status).parse(status);
801 }
802}
803
804void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
805 identifier.clear();
806 for (int32_t i = 0; i < units.length(); i++) {
807 units[i]->dimensionality *= -1;
808 }
809}
810
811bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) {
812 identifier.clear();
813 return appendImpl(*this, singleUnit, status);
814}
815
816MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
817 serialize(*this, status);
818 return MeasureUnit(std::move(*this));
819}
820
821
822MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
823 return Parser::from(identifier, status).parse(status).build(status);
824}
825
826UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
827 MeasureUnitImpl temp;
828 return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
829}
830
831UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const {
832 return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix;
833}
834
835MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const {
836 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
837 singleUnit.siPrefix = prefix;
838 return singleUnit.build(status);
839}
840
841int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
842 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
843 if (U_FAILURE(status)) { return 0; }
844 if (singleUnit.isDimensionless()) {
845 return 0;
846 }
847 return singleUnit.dimensionality;
848}
849
850MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
851 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
852 singleUnit.dimensionality = dimensionality;
853 return singleUnit.build(status);
854}
855
856MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
857 MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
858 impl.takeReciprocal(status);
859 return std::move(impl).build(status);
860}
861
862MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
863 MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
864 MeasureUnitImpl temp;
865 const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
866 if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
867 status = U_ILLEGAL_ARGUMENT_ERROR;
868 return {};
869 }
870 for (int32_t i = 0; i < otherImpl.units.length(); i++) {
871 impl.append(*otherImpl.units[i], status);
872 }
873 if (impl.units.length() > 1) {
874 impl.complexity = UMEASURE_UNIT_COMPOUND;
875 }
876 return std::move(impl).build(status);
877}
878
879LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnits(int32_t& outCount, UErrorCode& status) const {
880 MeasureUnitImpl temp;
881 const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
882 outCount = impl.units.length();
883 MeasureUnit* arr = new MeasureUnit[outCount];
884 for (int32_t i = 0; i < outCount; i++) {
885 arr[i] = impl.units[i]->build(status);
886 }
887 return LocalArray<MeasureUnit>(arr, status);
888}
889
890
891U_NAMESPACE_END
892
893#endif /* !UNCONFIG_NO_FORMATTING */
894