1// © 2019 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3
4// localematcher.h
5// created: 2019may08 Markus W. Scherer
6
7#ifndef __LOCALEMATCHER_H__
8#define __LOCALEMATCHER_H__
9
10#include "unicode/utypes.h"
11
12#if U_SHOW_CPLUSPLUS_API
13
14#include "unicode/locid.h"
15#include "unicode/stringpiece.h"
16#include "unicode/uobject.h"
17
18/**
19 * \file
20 * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
21 */
22
23#ifndef U_HIDE_DRAFT_API
24
25/**
26 * Builder option for whether the language subtag or the script subtag is most important.
27 *
28 * @see Builder#setFavorSubtag(FavorSubtag)
29 * @draft ICU 65
30 */
31enum ULocMatchFavorSubtag {
32 /**
33 * Language differences are most important, then script differences, then region differences.
34 * (This is the default behavior.)
35 *
36 * @draft ICU 65
37 */
38 ULOCMATCH_FAVOR_LANGUAGE,
39 /**
40 * Makes script differences matter relatively more than language differences.
41 *
42 * @draft ICU 65
43 */
44 ULOCMATCH_FAVOR_SCRIPT
45};
46#ifndef U_IN_DOXYGEN
47typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
48#endif
49
50/**
51 * Builder option for whether all desired locales are treated equally or
52 * earlier ones are preferred.
53 *
54 * @see Builder#setDemotionPerDesiredLocale(Demotion)
55 * @draft ICU 65
56 */
57enum ULocMatchDemotion {
58 /**
59 * All desired locales are treated equally.
60 *
61 * @draft ICU 65
62 */
63 ULOCMATCH_DEMOTION_NONE,
64 /**
65 * Earlier desired locales are preferred.
66 *
67 * <p>From each desired locale to the next,
68 * the distance to any supported locale is increased by an additional amount
69 * which is at least as large as most region mismatches.
70 * A later desired locale has to have a better match with some supported locale
71 * due to more than merely having the same region subtag.
72 *
73 * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
74 * yields <code>Result(en-GB, en)</code> because
75 * with the demotion of sv its perfect match is no better than
76 * the region distance between the earlier desired locale en-GB and en=en-US.
77 *
78 * <p>Notes:
79 * <ul>
80 * <li>In some cases, language and/or script differences can be as small as
81 * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
82 * <li>It is possible for certain region differences to be larger than usual,
83 * and larger than the demotion.
84 * (As of CLDR 35 there is no such case, but
85 * this is possible in future versions of the data.)
86 * </ul>
87 *
88 * @draft ICU 65
89 */
90 ULOCMATCH_DEMOTION_REGION
91};
92#ifndef U_IN_DOXYGEN
93typedef enum ULocMatchDemotion ULocMatchDemotion;
94#endif
95
96struct UHashtable;
97
98U_NAMESPACE_BEGIN
99
100struct LSR;
101
102class LocaleDistance;
103class LocaleLsrIterator;
104class UVector;
105class XLikelySubtags;
106
107/**
108 * Immutable class that picks the best match between a user's desired locales and
109 * an application's supported locales.
110 * Movable but not copyable.
111 *
112 * <p>Example:
113 * <pre>
114 * UErrorCode errorCode = U_ZERO_ERROR;
115 * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
116 * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
117 * </pre>
118 *
119 * <p>A matcher takes into account when languages are close to one another,
120 * such as Danish and Norwegian,
121 * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
122 *
123 * <p>If there are multiple supported locales with the same (language, script, region)
124 * likely subtags, then the current implementation returns the first of those locales.
125 * It ignores variant subtags (except for pseudolocale variants) and extensions.
126 * This may change in future versions.
127 *
128 * <p>For example, the current implementation does not distinguish between
129 * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
130 *
131 * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
132 * or place it earlier in the list of supported locales.
133 *
134 * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
135 * The current implementation compares each desired locale with supported locales
136 * in the following order:
137 * 1. Default locale, if supported;
138 * 2. CLDR "paradigm locales" like en-GB and es-419;
139 * 3. other supported locales.
140 * This may change in future versions.
141 *
142 * <p>Often a product will just need one matcher instance, built with the languages
143 * that it supports. However, it may want multiple instances with different
144 * default languages based on additional information, such as the domain.
145 *
146 * <p>This class is not intended for public subclassing.
147 *
148 * @draft ICU 65
149 */
150class U_COMMON_API LocaleMatcher : public UMemory {
151public:
152 /**
153 * Data for the best-matching pair of a desired and a supported locale.
154 * Movable but not copyable.
155 *
156 * @draft ICU 65
157 */
158 class U_COMMON_API Result : public UMemory {
159 public:
160 /**
161 * Move constructor; might modify the source.
162 * This object will have the same contents that the source object had.
163 *
164 * @param src Result to move contents from.
165 * @draft ICU 65
166 */
167 Result(Result &&src) U_NOEXCEPT;
168
169 /**
170 * Destructor.
171 *
172 * @draft ICU 65
173 */
174 ~Result();
175
176 /**
177 * Move assignment; might modify the source.
178 * This object will have the same contents that the source object had.
179 *
180 * @param src Result to move contents from.
181 * @draft ICU 65
182 */
183 Result &operator=(Result &&src) U_NOEXCEPT;
184
185 /**
186 * Returns the best-matching desired locale.
187 * nullptr if the list of desired locales is empty or if none matched well enough.
188 *
189 * @return the best-matching desired locale, or nullptr.
190 * @draft ICU 65
191 */
192 inline const Locale *getDesiredLocale() const { return desiredLocale; }
193
194 /**
195 * Returns the best-matching supported locale.
196 * If none matched well enough, this is the default locale.
197 * The default locale is nullptr if the list of supported locales is empty and
198 * no explicit default locale is set.
199 *
200 * @return the best-matching supported locale, or nullptr.
201 * @draft ICU 65
202 */
203 inline const Locale *getSupportedLocale() const { return supportedLocale; }
204
205 /**
206 * Returns the index of the best-matching desired locale in the input Iterable order.
207 * -1 if the list of desired locales is empty or if none matched well enough.
208 *
209 * @return the index of the best-matching desired locale, or -1.
210 * @draft ICU 65
211 */
212 inline int32_t getDesiredIndex() const { return desiredIndex; }
213
214 /**
215 * Returns the index of the best-matching supported locale in the
216 * constructor’s or builder’s input order (“set” Collection plus “added” locales).
217 * If the matcher was built from a locale list string, then the iteration order is that
218 * of a LocalePriorityList built from the same string.
219 * -1 if the list of supported locales is empty or if none matched well enough.
220 *
221 * @return the index of the best-matching supported locale, or -1.
222 * @draft ICU 65
223 */
224 inline int32_t getSupportedIndex() const { return supportedIndex; }
225
226 /**
227 * Takes the best-matching supported locale and adds relevant fields of the
228 * best-matching desired locale, such as the -t- and -u- extensions.
229 * May replace some fields of the supported locale.
230 * The result is the locale that should be used for date and number formatting, collation, etc.
231 * Returns the root locale if getSupportedLocale() returns nullptr.
232 *
233 * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
234 *
235 * @return a locale combining the best-matching desired and supported locales.
236 * @draft ICU 65
237 */
238 Locale makeResolvedLocale(UErrorCode &errorCode) const;
239
240 private:
241 Result(const Locale *desired, const Locale *supported,
242 int32_t desIndex, int32_t suppIndex, UBool owned) :
243 desiredLocale(desired), supportedLocale(supported),
244 desiredIndex(desIndex), supportedIndex(suppIndex),
245 desiredIsOwned(owned) {}
246
247 Result(const Result &other) = delete;
248 Result &operator=(const Result &other) = delete;
249
250 const Locale *desiredLocale;
251 const Locale *supportedLocale;
252 int32_t desiredIndex;
253 int32_t supportedIndex;
254 UBool desiredIsOwned;
255
256 friend class LocaleMatcher;
257 };
258
259 /**
260 * LocaleMatcher builder.
261 * Movable but not copyable.
262 *
263 * @see LocaleMatcher#builder()
264 * @draft ICU 65
265 */
266 class U_COMMON_API Builder : public UMemory {
267 public:
268 /**
269 * Constructs a builder used in chaining parameters for building a LocaleMatcher.
270 *
271 * @return a new Builder object
272 * @draft ICU 65
273 */
274 Builder() {}
275
276 /**
277 * Move constructor; might modify the source.
278 * This builder will have the same contents that the source builder had.
279 *
280 * @param src Builder to move contents from.
281 * @draft ICU 65
282 */
283 Builder(Builder &&src) U_NOEXCEPT;
284
285 /**
286 * Destructor.
287 *
288 * @draft ICU 65
289 */
290 ~Builder();
291
292 /**
293 * Move assignment; might modify the source.
294 * This builder will have the same contents that the source builder had.
295 *
296 * @param src Builder to move contents from.
297 * @draft ICU 65
298 */
299 Builder &operator=(Builder &&src) U_NOEXCEPT;
300
301 /**
302 * Parses an Accept-Language string
303 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
304 * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
305 * Allows whitespace in more places but does not allow "*".
306 * Clears any previously set/added supported locales first.
307 *
308 * @param locales the Accept-Language string of locales to set
309 * @return this Builder object
310 * @draft ICU 65
311 */
312 Builder &setSupportedLocalesFromListString(StringPiece locales);
313
314 /**
315 * Copies the supported locales, preserving iteration order.
316 * Clears any previously set/added supported locales first.
317 * Duplicates are allowed, and are not removed.
318 *
319 * @param locales the list of locale
320 * @return this Builder object
321 * @draft ICU 65
322 */
323 Builder &setSupportedLocales(Locale::Iterator &locales);
324
325 /**
326 * Copies the supported locales from the begin/end range, preserving iteration order.
327 * Clears any previously set/added supported locales first.
328 * Duplicates are allowed, and are not removed.
329 *
330 * Each of the iterator parameter values must be an
331 * input iterator whose value is convertible to const Locale &.
332 *
333 * @param begin Start of range.
334 * @param end Exclusive end of range.
335 * @return this Builder object
336 * @draft ICU 65
337 */
338 template<typename Iter>
339 Builder &setSupportedLocales(Iter begin, Iter end) {
340 if (U_FAILURE(errorCode_)) { return *this; }
341 clearSupportedLocales();
342 while (begin != end) {
343 addSupportedLocale(*begin++);
344 }
345 return *this;
346 }
347
348 /**
349 * Copies the supported locales from the begin/end range, preserving iteration order.
350 * Calls the converter to convert each *begin to a Locale or const Locale &.
351 * Clears any previously set/added supported locales first.
352 * Duplicates are allowed, and are not removed.
353 *
354 * Each of the iterator parameter values must be an
355 * input iterator whose value is convertible to const Locale &.
356 *
357 * @param begin Start of range.
358 * @param end Exclusive end of range.
359 * @param converter Converter from *begin to const Locale & or compatible.
360 * @return this Builder object
361 * @draft ICU 65
362 */
363 template<typename Iter, typename Conv>
364 Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
365 if (U_FAILURE(errorCode_)) { return *this; }
366 clearSupportedLocales();
367 while (begin != end) {
368 addSupportedLocale(converter(*begin++));
369 }
370 return *this;
371 }
372
373 /**
374 * Adds another supported locale.
375 * Duplicates are allowed, and are not removed.
376 *
377 * @param locale another locale
378 * @return this Builder object
379 * @draft ICU 65
380 */
381 Builder &addSupportedLocale(const Locale &locale);
382
383 /**
384 * Sets the default locale; if nullptr, or if it is not set explicitly,
385 * then the first supported locale is used as the default locale.
386 *
387 * @param defaultLocale the default locale (will be copied)
388 * @return this Builder object
389 * @draft ICU 65
390 */
391 Builder &setDefaultLocale(const Locale *defaultLocale);
392
393 /**
394 * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
395 * differences.
396 * This is used in situations (such as maps) where
397 * it is better to fall back to the same script than a similar language.
398 *
399 * @param subtag the subtag to favor
400 * @return this Builder object
401 * @draft ICU 65
402 */
403 Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
404
405 /**
406 * Option for whether all desired locales are treated equally or
407 * earlier ones are preferred (this is the default).
408 *
409 * @param demotion the demotion per desired locale to set.
410 * @return this Builder object
411 * @draft ICU 65
412 */
413 Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
414
415 /**
416 * Sets the UErrorCode if an error occurred while setting parameters.
417 * Preserves older error codes in the outErrorCode.
418 *
419 * @param outErrorCode Set to an error code if it does not contain one already
420 * and an error occurred while setting parameters.
421 * Otherwise unchanged.
422 * @return TRUE if U_FAILURE(outErrorCode)
423 * @draft ICU 65
424 */
425 UBool copyErrorTo(UErrorCode &outErrorCode) const;
426
427 /**
428 * Builds and returns a new locale matcher.
429 * This builder can continue to be used.
430 *
431 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
432 * or else the function returns immediately. Check for U_FAILURE()
433 * on output or use with function chaining. (See User Guide for details.)
434 * @return new LocaleMatcher.
435 * @draft ICU 65
436 */
437 LocaleMatcher build(UErrorCode &errorCode) const;
438
439 private:
440 friend class LocaleMatcher;
441
442 Builder(const Builder &other) = delete;
443 Builder &operator=(const Builder &other) = delete;
444
445 void clearSupportedLocales();
446 bool ensureSupportedLocaleVector();
447
448 UErrorCode errorCode_ = U_ZERO_ERROR;
449 UVector *supportedLocales_ = nullptr;
450 int32_t thresholdDistance_ = -1;
451 ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
452 Locale *defaultLocale_ = nullptr;
453 ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
454 };
455
456 // FYI No public LocaleMatcher constructors in C++; use the Builder.
457
458 /**
459 * Move copy constructor; might modify the source.
460 * This matcher will have the same settings that the source matcher had.
461 * @param src source matcher
462 * @draft ICU 65
463 */
464 LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;
465
466 /**
467 * Destructor.
468 * @draft ICU 65
469 */
470 ~LocaleMatcher();
471
472 /**
473 * Move assignment operator; might modify the source.
474 * This matcher will have the same settings that the source matcher had.
475 * The behavior is undefined if *this and src are the same object.
476 * @param src source matcher
477 * @return *this
478 * @draft ICU 65
479 */
480 LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;
481
482 /**
483 * Returns the supported locale which best matches the desired locale.
484 *
485 * @param desiredLocale Typically a user's language.
486 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
487 * or else the function returns immediately. Check for U_FAILURE()
488 * on output or use with function chaining. (See User Guide for details.)
489 * @return the best-matching supported locale.
490 * @draft ICU 65
491 */
492 const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
493
494 /**
495 * Returns the supported locale which best matches one of the desired locales.
496 *
497 * @param desiredLocales Typically a user's languages, in order of preference (descending).
498 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
499 * or else the function returns immediately. Check for U_FAILURE()
500 * on output or use with function chaining. (See User Guide for details.)
501 * @return the best-matching supported locale.
502 * @draft ICU 65
503 */
504 const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
505
506 /**
507 * Parses an Accept-Language string
508 * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
509 * such as "af, en, fr;q=0.9",
510 * and returns the supported locale which best matches one of the desired locales.
511 * Allows whitespace in more places but does not allow "*".
512 *
513 * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
514 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
515 * or else the function returns immediately. Check for U_FAILURE()
516 * on output or use with function chaining. (See User Guide for details.)
517 * @return the best-matching supported locale.
518 * @draft ICU 65
519 */
520 const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
521
522 /**
523 * Returns the best match between the desired locale and the supported locales.
524 * If the result's desired locale is not nullptr, then it is the address of the input locale.
525 * It has not been cloned.
526 *
527 * @param desiredLocale Typically a user's language.
528 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
529 * or else the function returns immediately. Check for U_FAILURE()
530 * on output or use with function chaining. (See User Guide for details.)
531 * @return the best-matching pair of the desired and a supported locale.
532 * @draft ICU 65
533 */
534 Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
535
536 /**
537 * Returns the best match between the desired and supported locales.
538 * If the result's desired locale is not nullptr, then it is a clone of
539 * the best-matching desired locale. The Result object owns the clone.
540 *
541 * @param desiredLocales Typically a user's languages, in order of preference (descending).
542 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
543 * or else the function returns immediately. Check for U_FAILURE()
544 * on output or use with function chaining. (See User Guide for details.)
545 * @return the best-matching pair of a desired and a supported locale.
546 * @draft ICU 65
547 */
548 Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
549
550#ifndef U_HIDE_INTERNAL_API
551 /**
552 * Returns a fraction between 0 and 1, where 1 means that the languages are a
553 * perfect match, and 0 means that they are completely different.
554 *
555 * <p>This is mostly an implementation detail, and the precise values may change over time.
556 * The implementation may use either the maximized forms or the others ones, or both.
557 * The implementation may or may not rely on the forms to be consistent with each other.
558 *
559 * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
560 *
561 * @param desired Desired locale.
562 * @param supported Supported locale.
563 * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
564 * or else the function returns immediately. Check for U_FAILURE()
565 * on output or use with function chaining. (See User Guide for details.)
566 * @return value between 0 and 1, inclusive.
567 * @internal (has a known user)
568 */
569 double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
570#endif // U_HIDE_INTERNAL_API
571
572private:
573 LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
574 LocaleMatcher(const LocaleMatcher &other) = delete;
575 LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
576
577 int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
578
579 const XLikelySubtags &likelySubtags;
580 const LocaleDistance &localeDistance;
581 int32_t thresholdDistance;
582 int32_t demotionPerDesiredLocale;
583 ULocMatchFavorSubtag favorSubtag;
584
585 // These are in input order.
586 const Locale ** supportedLocales;
587 LSR *lsrs;
588 int32_t supportedLocalesLength;
589 // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
590 UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
591 // Array versions of the supportedLsrToIndex keys and values.
592 // The distance lookup loops over the supportedLSRs and returns the index of the best match.
593 const LSR **supportedLSRs;
594 int32_t *supportedIndexes;
595 int32_t supportedLSRsLength;
596 Locale *ownedDefaultLocale;
597 const Locale *defaultLocale;
598 int32_t defaultLocaleIndex;
599};
600
601U_NAMESPACE_END
602
603#endif // U_HIDE_DRAFT_API
604#endif // U_SHOW_CPLUSPLUS_API
605#endif // __LOCALEMATCHER_H__
606