1 | // © 2019 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html#License |
3 | |
4 | // locdistance.h |
5 | // created: 2019may08 Markus W. Scherer |
6 | |
7 | #ifndef __LOCDISTANCE_H__ |
8 | #define __LOCDISTANCE_H__ |
9 | |
10 | #include "unicode/utypes.h" |
11 | #include "unicode/bytestrie.h" |
12 | #include "unicode/localematcher.h" |
13 | #include "unicode/locid.h" |
14 | #include "unicode/uobject.h" |
15 | #include "lsr.h" |
16 | |
17 | U_NAMESPACE_BEGIN |
18 | |
19 | struct LocaleDistanceData; |
20 | |
21 | /** |
22 | * Offline-built data for LocaleMatcher. |
23 | * Mostly but not only the data for mapping locales to their maximized forms. |
24 | */ |
25 | class LocaleDistance final : public UMemory { |
26 | public: |
27 | static const LocaleDistance *getSingleton(UErrorCode &errorCode); |
28 | |
29 | /** |
30 | * Finds the supported LSR with the smallest distance from the desired one. |
31 | * Equivalent LSR subtags must be normalized into a canonical form. |
32 | * |
33 | * <p>Returns the index of the lowest-distance supported LSR in bits 31..8 |
34 | * (negative if none has a distance below the threshold), |
35 | * and its distance (0..ABOVE_THRESHOLD) in bits 7..0. |
36 | */ |
37 | int32_t getBestIndexAndDistance(const LSR &desired, |
38 | const LSR **supportedLSRs, int32_t supportedLSRsLength, |
39 | int32_t threshold, ULocMatchFavorSubtag favorSubtag) const; |
40 | |
41 | int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; } |
42 | |
43 | UBool isParadigmLSR(const LSR &lsr) const; |
44 | |
45 | int32_t getDefaultScriptDistance() const { |
46 | return defaultScriptDistance; |
47 | } |
48 | |
49 | int32_t getDefaultDemotionPerDesiredLocale() const { |
50 | return defaultDemotionPerDesiredLocale; |
51 | } |
52 | |
53 | private: |
54 | LocaleDistance(const LocaleDistanceData &data); |
55 | LocaleDistance(const LocaleDistance &other) = delete; |
56 | LocaleDistance &operator=(const LocaleDistance &other) = delete; |
57 | |
58 | static void initLocaleDistance(UErrorCode &errorCode); |
59 | |
60 | static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState, |
61 | const char *desired, const char *supported); |
62 | |
63 | static int32_t getRegionPartitionsDistance( |
64 | BytesTrie &iter, uint64_t startState, |
65 | const char *desiredPartitions, const char *supportedPartitions, |
66 | int32_t threshold); |
67 | |
68 | static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState); |
69 | |
70 | static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue); |
71 | |
72 | const char *partitionsForRegion(const LSR &lsr) const { |
73 | // ill-formed region -> one non-matching string |
74 | int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex]; |
75 | return partitionArrays[pIndex]; |
76 | } |
77 | |
78 | int32_t getDefaultRegionDistance() const { |
79 | return defaultRegionDistance; |
80 | } |
81 | |
82 | // The trie maps each dlang+slang+dscript+sscript+dregion+sregion |
83 | // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance. |
84 | // There is also a trie value for each subsequence of whole subtags. |
85 | // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"". |
86 | BytesTrie trie; |
87 | |
88 | /** |
89 | * Maps each region to zero or more single-character partitions. |
90 | */ |
91 | const uint8_t *regionToPartitionsIndex; |
92 | const char **partitionArrays; |
93 | |
94 | /** |
95 | * Used to get the paradigm region for a cluster, if there is one. |
96 | */ |
97 | const LSR *paradigmLSRs; |
98 | int32_t paradigmLSRsLength; |
99 | |
100 | int32_t defaultLanguageDistance; |
101 | int32_t defaultScriptDistance; |
102 | int32_t defaultRegionDistance; |
103 | int32_t minRegionDistance; |
104 | int32_t defaultDemotionPerDesiredLocale; |
105 | }; |
106 | |
107 | U_NAMESPACE_END |
108 | |
109 | #endif // __LOCDISTANCE_H__ |
110 | |