1// © 2019 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// loclikelysubtags.h
5// created: 2019may08 Markus W. Scherer
6
7#ifndef __LOCLIKELYSUBTAGS_H__
8#define __LOCLIKELYSUBTAGS_H__
9
10#include <utility>
11#include "unicode/utypes.h"
12#include "unicode/bytestrie.h"
13#include "unicode/locid.h"
14#include "unicode/uobject.h"
15#include "unicode/ures.h"
16#include "charstrmap.h"
17#include "lsr.h"
18
19U_NAMESPACE_BEGIN
20
21struct XLikelySubtagsData;
22
23struct LocaleDistanceData {
24 LocaleDistanceData() = default;
25 LocaleDistanceData(LocaleDistanceData &&data);
26 ~LocaleDistanceData();
27
28 const uint8_t *distanceTrieBytes = nullptr;
29 const uint8_t *regionToPartitions = nullptr;
30 const char **partitions = nullptr;
31 const LSR *paradigms = nullptr;
32 int32_t paradigmsLength = 0;
33 const int32_t *distances = nullptr;
34
35private:
36 LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
37};
38
39// TODO(ICU-20777): Rename to just LikelySubtags.
40class XLikelySubtags final : public UMemory {
41public:
42 ~XLikelySubtags();
43
44 static constexpr int32_t SKIP_SCRIPT = 1;
45
46 // VisibleForTesting
47 static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
48
49 // VisibleForTesting
50 LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
51
52 /**
53 * Tests whether lsr is "more likely" than other.
54 * For example, fr-Latn-FR is more likely than fr-Latn-CH because
55 * FR is the default region for fr-Latn.
56 *
57 * The likelyInfo caches lookup information between calls.
58 * The return value is an updated likelyInfo value,
59 * with bit 0 set if lsr is "more likely".
60 * The initial value of likelyInfo must be negative.
61 */
62 int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;
63
64 // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
65 // in loclikely.cpp to this new code, including activating this
66 // minimizeSubtags() function. The LocaleMatcher does not minimize.
67#if 0
68 LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
69 ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
70#endif
71
72 // visible for LocaleDistance
73 const LocaleDistanceData &getDistanceData() const { return distanceData; }
74
75private:
76 XLikelySubtags(XLikelySubtagsData &data);
77 XLikelySubtags(const XLikelySubtags &other) = delete;
78 XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
79
80 static void initLikelySubtags(UErrorCode &errorCode);
81
82 LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
83 const char *variant, UErrorCode &errorCode) const;
84
85 /**
86 * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
87 */
88 LSR maximize(const char *language, const char *script, const char *region) const;
89
90 int32_t getLikelyIndex(const char *language, const char *script) const;
91
92 static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
93
94 UResourceBundle *langInfoBundle;
95 // We could store the strings by value, except that if there were few enough strings,
96 // moving the contents could copy it to a different array,
97 // invalidating the pointers stored in the maps.
98 CharString *strings;
99 CharStringMap languageAliases;
100 CharStringMap regionAliases;
101
102 // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
103 // There is also a trie value for each intermediate lang and lang+script.
104 // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
105 BytesTrie trie;
106 uint64_t trieUndState;
107 uint64_t trieUndZzzzState;
108 int32_t defaultLsrIndex;
109 uint64_t trieFirstLetterStates[26];
110 const LSR *lsrs;
111#if U_DEBUG
112 int32_t lsrsLength;
113#endif
114
115 // distance/matcher data: see comment in XLikelySubtagsData::load()
116 LocaleDistanceData distanceData;
117};
118
119U_NAMESPACE_END
120
121#endif // __LOCLIKELYSUBTAGS_H__
122