1// © 2019 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3
4// localematcher.h
5// created: 2019may08 Markus W. Scherer
6
7#ifndef __LOCALEMATCHER_H__
8#define __LOCALEMATCHER_H__
9
10#include "unicode/utypes.h"
11
12#if U_SHOW_CPLUSPLUS_API
13
14#include "unicode/locid.h"
15#include "unicode/stringpiece.h"
16#include "unicode/uobject.h"
17
18/**
19 * \file
20 * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
21 */
22
23#ifndef U_FORCE_HIDE_DRAFT_API
24
25/**
26 * Builder option for whether the language subtag or the script subtag is most important.
27 *
28 * @see Builder#setFavorSubtag(ULocMatchFavorSubtag)
29 * @draft ICU 65
30 */
31enum ULocMatchFavorSubtag {
32 /**
33 * Language differences are most important, then script differences, then region differences.
34 * (This is the default behavior.)
35 *
36 * @draft ICU 65
37 */
38 ULOCMATCH_FAVOR_LANGUAGE,
39 /**
40 * Makes script differences matter relatively more than language differences.
41 *
42 * @draft ICU 65
43 */
44 ULOCMATCH_FAVOR_SCRIPT
45};
46#ifndef U_IN_DOXYGEN
47typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
48#endif
49
50/**
51 * Builder option for whether all desired locales are treated equally or
52 * earlier ones are preferred.
53 *
54 * @see Builder#setDemotionPerDesiredLocale(ULocMatchDemotion)
55 * @draft ICU 65
56 */
57enum ULocMatchDemotion {
58 /**
59 * All desired locales are treated equally.
60 *
61 * @draft ICU 65
62 */
63 ULOCMATCH_DEMOTION_NONE,
64 /**
65 * Earlier desired locales are preferred.
66 *
67 * <p>From each desired locale to the next,
68 * the distance to any supported locale is increased by an additional amount
69 * which is at least as large as most region mismatches.
70 * A later desired locale has to have a better match with some supported locale
71 * due to more than merely having the same region subtag.
72 *
73 * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
74 * yields <code>Result(en-GB, en)</code> because
75 * with the demotion of sv its perfect match is no better than
76 * the region distance between the earlier desired locale en-GB and en=en-US.
77 *
78 * <p>Notes:
79 * <ul>
80 * <li>In some cases, language and/or script differences can be as small as
81 * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
82 * <li>It is possible for certain region differences to be larger than usual,
83 * and larger than the demotion.
84 * (As of CLDR 35 there is no such case, but
85 * this is possible in future versions of the data.)
86 * </ul>
87 *
88 * @draft ICU 65
89 */
90 ULOCMATCH_DEMOTION_REGION
91};
92#ifndef U_IN_DOXYGEN
93typedef enum ULocMatchDemotion ULocMatchDemotion;
94#endif
95
96/**
97 * Builder option for whether to include or ignore one-way (fallback) match data.
98 * The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries.
99 * Sometimes it is desirable to ignore those.
100 *
101 * <p>For example, consider a web application with the UI in a given language,
102 * with a link to another, related web app.
103 * The link should include the UI language, and the target server may also use
104 * the client’s Accept-Language header data.
105 * The target server has its own list of supported languages.
106 * One may want to favor UI language consistency, that is,
107 * if there is a decent match for the original UI language, we want to use it,
108 * but not if it is merely a fallback.
109 *
110 * @see Builder#setDirection(ULocMatchDirection)
111 * @draft ICU 67
112 */
113enum ULocMatchDirection {
114 /**
115 * Locale matching includes one-way matches such as Breton→French. (default)
116 *
117 * @draft ICU 67
118 */
119 ULOCMATCH_DIRECTION_WITH_ONE_WAY,
120 /**
121 * Locale matching limited to two-way matches including e.g. Danish↔Norwegian
122 * but ignoring one-way matches.
123 *
124 * @draft ICU 67
125 */
126 ULOCMATCH_DIRECTION_ONLY_TWO_WAY
127};
128#ifndef U_IN_DOXYGEN
129typedef enum ULocMatchDirection ULocMatchDirection;
130#endif
131
132struct UHashtable;
133
134U_NAMESPACE_BEGIN
135
136struct LSR;
137
138class LocaleDistance;
139class LocaleLsrIterator;
140class UVector;
141class XLikelySubtags;
142
143/**
144 * Immutable class that picks the best match between a user's desired locales and
145 * an application's supported locales.
146 * Movable but not copyable.
147 *
148 * <p>Example:
149 * <pre>
150 * UErrorCode errorCode = U_ZERO_ERROR;
151 * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
152 * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
153 * </pre>
154 *
155 * <p>A matcher takes into account when languages are close to one another,
156 * such as Danish and Norwegian,
157 * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
158 *
159 * <p>If there are multiple supported locales with the same (language, script, region)
160 * likely subtags, then the current implementation returns the first of those locales.
161 * It ignores variant subtags (except for pseudolocale variants) and extensions.
162 * This may change in future versions.
163 *
164 * <p>For example, the current implementation does not distinguish between
165 * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
166 *
167 * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
168 * or place it earlier in the list of supported locales.
169 *
170 * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
171 * The current implementation compares each desired locale with supported locales
172 * in the following order:
173 * 1. Default locale, if supported;
174 * 2. CLDR "paradigm locales" like en-GB and es-419;
175 * 3. other supported locales.
176 * This may change in future versions.
177 *
178 * <p>Often a product will just need one matcher instance, built with the languages
179 * that it supports. However, it may want multiple instances with different
180 * default languages based on additional information, such as the domain.
181 *
182 * <p>This class is not intended for public subclassing.
183 *
184 * @draft ICU 65
185 */
186class U_COMMON_API LocaleMatcher : public UMemory {
187public:
188 /**
189 * Data for the best-matching pair of a desired and a supported locale.
190 * Movable but not copyable.
191 *
192 * @draft ICU 65
193 */
194 class U_COMMON_API Result : public UMemory {
195 public:
196 /**
197 * Move constructor; might modify the source.
198 * This object will have the same contents that the source object had.
199 *
200 * @param src Result to move contents from.
201 * @draft ICU 65
202 */
203 Result(Result &&src) U_NOEXCEPT;
204
205 /**
206 * Destructor.
207 *
208 * @draft ICU 65
209 */
210 ~Result();
211
212 /**
213 * Move assignment; might modify the source.
214 * This object will have the same contents that the source object had.
215 *
216 * @param src Result to move contents from.
217 * @draft ICU 65
218 */
219 Result &operator=(Result &&src) U_NOEXCEPT;
220
221#ifndef U_HIDE_DRAFT_API
222 /**
223 * Returns the best-matching desired locale.
224 * nullptr if the list of desired locales is empty or if none matched well enough.
225 *
226 * @return the best-matching desired locale, or nullptr.
227 * @draft ICU 65
228 */
229 inline const Locale *getDesiredLocale() const { return desiredLocale; }
230
231 /**
232 * Returns the best-matching supported locale.
233 * If none matched well enough, this is the default locale.
234 * The default locale is nullptr if the list of supported locales is empty and
235 * no explicit default locale is set.
236 *
237 * @return the best-matching supported locale, or nullptr.
238 * @draft ICU 65
239 */
240 inline const Locale *getSupportedLocale() const { return supportedLocale; }
241
242 /**
243 * Returns the index of the best-matching desired locale in the input Iterable order.
244 * -1 if the list of desired locales is empty or if none matched well enough.
245 *
246 * @return the index of the best-matching desired locale, or -1.
247 * @draft ICU 65
248 */
249 inline int32_t getDesiredIndex() const { return desiredIndex; }
250
251 /**
252 * Returns the index of the best-matching supported locale in the
253 * constructor’s or builder’s input order (“set” Collection plus “added” locales).
254 * If the matcher was built from a locale list string, then the iteration order is that
255 * of a LocalePriorityList built from the same string.
256 * -1 if the list of supported locales is empty or if none matched well enough.
257 *
258 * @return the index of the best-matching supported locale, or -1.
259 * @draft ICU 65
260 */
261 inline int32_t getSupportedIndex() const { return supportedIndex; }
262
263 /**
264 * Takes the best-matching supported locale and adds relevant fields of the
265 * best-matching desired locale, such as the -t- and -u- extensions.
266 * May replace some fields of the supported locale.
267 * The result is the locale that should be used for date and number formatting, collation, etc.
268 * Returns the root locale if getSupportedLocale() returns nullptr.
269 *
270 * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
271 *
272 * @return a locale combining the best-matching desired and supported locales.
273 * @draft ICU 65
274 */
275 Locale makeResolvedLocale(UErrorCode &errorCode) const;
276#endif // U_HIDE_DRAFT_API
277
278 private:
279 Result(const Locale *desired, const Locale *supported,
280 int32_t desIndex, int32_t suppIndex, UBool owned) :
281 desiredLocale(desired), supportedLocale(supported),
282 desiredIndex(desIndex), supportedIndex(suppIndex),
283 desiredIsOwned(owned) {}
284
285 Result(const Result &other) = delete;
286 Result &operator=(const Result &other) = delete;
287
288 const Locale *desiredLocale;
289 const Locale *supportedLocale;
290 int32_t desiredIndex;
291 int32_t supportedIndex;
292 UBool desiredIsOwned;
293
294 friend class LocaleMatcher;
295 };
296
297 /**
298 * LocaleMatcher builder.
299 * Movable but not copyable.
300 *
301 * @see LocaleMatcher#builder()
302 * @draft ICU 65
303 */
304 class U_COMMON_API Builder : public UMemory {
305 public:
306 /**
307 * Constructs a builder used in chaining parameters for building a LocaleMatcher.
308 *
309 * @return a new Builder object
310 * @draft ICU 65
311 */
312 Builder() {}
313
314 /**
315 * Move constructor; might modify the source.
316 * This builder will have the same contents that the source builder had.
317 *
318 * @param src Builder to move contents from.
319 * @draft ICU 65
320 */
321 Builder(Builder &&src) U_NOEXCEPT;
322
323 /**
324 * Destructor.
325 *
326 * @draft ICU 65
327 */
328 ~Builder();
329
330 /**
331 * Move assignment; might modify the source.
332 * This builder will have the same contents that the source builder had.
333 *
334 * @param src Builder to move contents from.
335 * @draft ICU 65
336 */
337 Builder &operator=(Builder &&src) U_NOEXCEPT;
338
339#ifndef U_HIDE_DRAFT_API
340 /**
341 * Parses an Accept-Language string
342 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
343 * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
344 * Allows whitespace in more places but does not allow "*".
345 * Clears any previously set/added supported locales first.
346 *
347 * @param locales the Accept-Language string of locales to set
348 * @return this Builder object
349 * @draft ICU 65
350 */
351 Builder &setSupportedLocalesFromListString(StringPiece locales);
352
353 /**
354 * Copies the supported locales, preserving iteration order.
355 * Clears any previously set/added supported locales first.
356 * Duplicates are allowed, and are not removed.
357 *
358 * @param locales the list of locale
359 * @return this Builder object
360 * @draft ICU 65
361 */
362 Builder &setSupportedLocales(Locale::Iterator &locales);
363
364 /**
365 * Copies the supported locales from the begin/end range, preserving iteration order.
366 * Clears any previously set/added supported locales first.
367 * Duplicates are allowed, and are not removed.
368 *
369 * Each of the iterator parameter values must be an
370 * input iterator whose value is convertible to const Locale &.
371 *
372 * @param begin Start of range.
373 * @param end Exclusive end of range.
374 * @return this Builder object
375 * @draft ICU 65
376 */
377 template<typename Iter>
378 Builder &setSupportedLocales(Iter begin, Iter end) {
379 if (U_FAILURE(errorCode_)) { return *this; }
380 clearSupportedLocales();
381 while (begin != end) {
382 addSupportedLocale(*begin++);
383 }
384 return *this;
385 }
386
387 /**
388 * Copies the supported locales from the begin/end range, preserving iteration order.
389 * Calls the converter to convert each *begin to a Locale or const Locale &.
390 * Clears any previously set/added supported locales first.
391 * Duplicates are allowed, and are not removed.
392 *
393 * Each of the iterator parameter values must be an
394 * input iterator whose value is convertible to const Locale &.
395 *
396 * @param begin Start of range.
397 * @param end Exclusive end of range.
398 * @param converter Converter from *begin to const Locale & or compatible.
399 * @return this Builder object
400 * @draft ICU 65
401 */
402 template<typename Iter, typename Conv>
403 Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
404 if (U_FAILURE(errorCode_)) { return *this; }
405 clearSupportedLocales();
406 while (begin != end) {
407 addSupportedLocale(converter(*begin++));
408 }
409 return *this;
410 }
411
412 /**
413 * Adds another supported locale.
414 * Duplicates are allowed, and are not removed.
415 *
416 * @param locale another locale
417 * @return this Builder object
418 * @draft ICU 65
419 */
420 Builder &addSupportedLocale(const Locale &locale);
421
422 /**
423 * Sets the default locale; if nullptr, or if it is not set explicitly,
424 * then the first supported locale is used as the default locale.
425 *
426 * @param defaultLocale the default locale (will be copied)
427 * @return this Builder object
428 * @draft ICU 65
429 */
430 Builder &setDefaultLocale(const Locale *defaultLocale);
431
432 /**
433 * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
434 * differences.
435 * This is used in situations (such as maps) where
436 * it is better to fall back to the same script than a similar language.
437 *
438 * @param subtag the subtag to favor
439 * @return this Builder object
440 * @draft ICU 65
441 */
442 Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
443
444 /**
445 * Option for whether all desired locales are treated equally or
446 * earlier ones are preferred (this is the default).
447 *
448 * @param demotion the demotion per desired locale to set.
449 * @return this Builder object
450 * @draft ICU 65
451 */
452 Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
453
454 /**
455 * Option for whether to include or ignore one-way (fallback) match data.
456 * By default, they are included.
457 *
458 * @param direction the match direction to set.
459 * @return this Builder object
460 * @draft ICU 67
461 */
462 Builder &setDirection(ULocMatchDirection direction) {
463 if (U_SUCCESS(errorCode_)) {
464 direction_ = direction;
465 }
466 return *this;
467 }
468
469 /**
470 * Sets the UErrorCode if an error occurred while setting parameters.
471 * Preserves older error codes in the outErrorCode.
472 *
473 * @param outErrorCode Set to an error code if it does not contain one already
474 * and an error occurred while setting parameters.
475 * Otherwise unchanged.
476 * @return TRUE if U_FAILURE(outErrorCode)
477 * @draft ICU 65
478 */
479 UBool copyErrorTo(UErrorCode &outErrorCode) const;
480
481 /**
482 * Builds and returns a new locale matcher.
483 * This builder can continue to be used.
484 *
485 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
486 * or else the function returns immediately. Check for U_FAILURE()
487 * on output or use with function chaining. (See User Guide for details.)
488 * @return new LocaleMatcher.
489 * @draft ICU 65
490 */
491 LocaleMatcher build(UErrorCode &errorCode) const;
492#endif // U_HIDE_DRAFT_API
493
494 private:
495 friend class LocaleMatcher;
496
497 Builder(const Builder &other) = delete;
498 Builder &operator=(const Builder &other) = delete;
499
500 void clearSupportedLocales();
501 bool ensureSupportedLocaleVector();
502
503 UErrorCode errorCode_ = U_ZERO_ERROR;
504 UVector *supportedLocales_ = nullptr;
505 int32_t thresholdDistance_ = -1;
506 ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
507 Locale *defaultLocale_ = nullptr;
508 ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
509 ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY;
510 };
511
512 // FYI No public LocaleMatcher constructors in C++; use the Builder.
513
514 /**
515 * Move copy constructor; might modify the source.
516 * This matcher will have the same settings that the source matcher had.
517 * @param src source matcher
518 * @draft ICU 65
519 */
520 LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;
521
522 /**
523 * Destructor.
524 * @draft ICU 65
525 */
526 ~LocaleMatcher();
527
528 /**
529 * Move assignment operator; might modify the source.
530 * This matcher will have the same settings that the source matcher had.
531 * The behavior is undefined if *this and src are the same object.
532 * @param src source matcher
533 * @return *this
534 * @draft ICU 65
535 */
536 LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;
537
538#ifndef U_HIDE_DRAFT_API
539 /**
540 * Returns the supported locale which best matches the desired locale.
541 *
542 * @param desiredLocale Typically a user's language.
543 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
544 * or else the function returns immediately. Check for U_FAILURE()
545 * on output or use with function chaining. (See User Guide for details.)
546 * @return the best-matching supported locale.
547 * @draft ICU 65
548 */
549 const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
550
551 /**
552 * Returns the supported locale which best matches one of the desired locales.
553 *
554 * @param desiredLocales Typically a user's languages, in order of preference (descending).
555 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
556 * or else the function returns immediately. Check for U_FAILURE()
557 * on output or use with function chaining. (See User Guide for details.)
558 * @return the best-matching supported locale.
559 * @draft ICU 65
560 */
561 const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
562
563 /**
564 * Parses an Accept-Language string
565 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
566 * such as "af, en, fr;q=0.9",
567 * and returns the supported locale which best matches one of the desired locales.
568 * Allows whitespace in more places but does not allow "*".
569 *
570 * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
571 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
572 * or else the function returns immediately. Check for U_FAILURE()
573 * on output or use with function chaining. (See User Guide for details.)
574 * @return the best-matching supported locale.
575 * @draft ICU 65
576 */
577 const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
578
579 /**
580 * Returns the best match between the desired locale and the supported locales.
581 * If the result's desired locale is not nullptr, then it is the address of the input locale.
582 * It has not been cloned.
583 *
584 * @param desiredLocale Typically a user's language.
585 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
586 * or else the function returns immediately. Check for U_FAILURE()
587 * on output or use with function chaining. (See User Guide for details.)
588 * @return the best-matching pair of the desired and a supported locale.
589 * @draft ICU 65
590 */
591 Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
592
593 /**
594 * Returns the best match between the desired and supported locales.
595 * If the result's desired locale is not nullptr, then it is a clone of
596 * the best-matching desired locale. The Result object owns the clone.
597 *
598 * @param desiredLocales Typically a user's languages, in order of preference (descending).
599 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
600 * or else the function returns immediately. Check for U_FAILURE()
601 * on output or use with function chaining. (See User Guide for details.)
602 * @return the best-matching pair of a desired and a supported locale.
603 * @draft ICU 65
604 */
605 Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
606#endif // U_HIDE_DRAFT_API
607
608#ifndef U_HIDE_INTERNAL_API
609 /**
610 * Returns a fraction between 0 and 1, where 1 means that the languages are a
611 * perfect match, and 0 means that they are completely different.
612 *
613 * <p>This is mostly an implementation detail, and the precise values may change over time.
614 * The implementation may use either the maximized forms or the others ones, or both.
615 * The implementation may or may not rely on the forms to be consistent with each other.
616 *
617 * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
618 *
619 * @param desired Desired locale.
620 * @param supported Supported locale.
621 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
622 * or else the function returns immediately. Check for U_FAILURE()
623 * on output or use with function chaining. (See User Guide for details.)
624 * @return value between 0 and 1, inclusive.
625 * @internal (has a known user)
626 */
627 double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
628#endif // U_HIDE_INTERNAL_API
629
630private:
631 LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
632 LocaleMatcher(const LocaleMatcher &other) = delete;
633 LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
634
635 int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode);
636
637 int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
638
639 const XLikelySubtags &likelySubtags;
640 const LocaleDistance &localeDistance;
641 int32_t thresholdDistance;
642 int32_t demotionPerDesiredLocale;
643 ULocMatchFavorSubtag favorSubtag;
644 ULocMatchDirection direction;
645
646 // These are in input order.
647 const Locale ** supportedLocales;
648 LSR *lsrs;
649 int32_t supportedLocalesLength;
650 // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
651 UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
652 // Array versions of the supportedLsrToIndex keys and values.
653 // The distance lookup loops over the supportedLSRs and returns the index of the best match.
654 const LSR **supportedLSRs;
655 int32_t *supportedIndexes;
656 int32_t supportedLSRsLength;
657 Locale *ownedDefaultLocale;
658 const Locale *defaultLocale;
659};
660
661U_NAMESPACE_END
662
663#endif // U_FORCE_HIDE_DRAFT_API
664#endif // U_SHOW_CPLUSPLUS_API
665#endif // __LOCALEMATCHER_H__
666