1// © 2019 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3
4// locdistance.h
5// created: 2019may08 Markus W. Scherer
6
7#ifndef __LOCDISTANCE_H__
8#define __LOCDISTANCE_H__
9
10#include "unicode/utypes.h"
11#include "unicode/bytestrie.h"
12#include "unicode/localematcher.h"
13#include "unicode/locid.h"
14#include "unicode/uobject.h"
15#include "lsr.h"
16
17U_NAMESPACE_BEGIN
18
19struct LocaleDistanceData;
20
21/**
22 * Offline-built data for LocaleMatcher.
23 * Mostly but not only the data for mapping locales to their maximized forms.
24 */
25class LocaleDistance final : public UMemory {
26public:
27 static const LocaleDistance *getSingleton(UErrorCode &errorCode);
28
29 /**
30 * Finds the supported LSR with the smallest distance from the desired one.
31 * Equivalent LSR subtags must be normalized into a canonical form.
32 *
33 * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
34 * (negative if none has a distance below the threshold),
35 * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
36 */
37 int32_t getBestIndexAndDistance(const LSR &desired,
38 const LSR **supportedLSRs, int32_t supportedLSRsLength,
39 int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
40
41 int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
42
43 UBool isParadigmLSR(const LSR &lsr) const;
44
45 int32_t getDefaultScriptDistance() const {
46 return defaultScriptDistance;
47 }
48
49 int32_t getDefaultDemotionPerDesiredLocale() const {
50 return defaultDemotionPerDesiredLocale;
51 }
52
53private:
54 LocaleDistance(const LocaleDistanceData &data);
55 LocaleDistance(const LocaleDistance &other) = delete;
56 LocaleDistance &operator=(const LocaleDistance &other) = delete;
57
58 static void initLocaleDistance(UErrorCode &errorCode);
59
60 static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
61 const char *desired, const char *supported);
62
63 static int32_t getRegionPartitionsDistance(
64 BytesTrie &iter, uint64_t startState,
65 const char *desiredPartitions, const char *supportedPartitions,
66 int32_t threshold);
67
68 static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
69
70 static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
71
72 const char *partitionsForRegion(const LSR &lsr) const {
73 // ill-formed region -> one non-matching string
74 int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
75 return partitionArrays[pIndex];
76 }
77
78 int32_t getDefaultRegionDistance() const {
79 return defaultRegionDistance;
80 }
81
82 // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
83 // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
84 // There is also a trie value for each subsequence of whole subtags.
85 // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
86 BytesTrie trie;
87
88 /**
89 * Maps each region to zero or more single-character partitions.
90 */
91 const uint8_t *regionToPartitionsIndex;
92 const char **partitionArrays;
93
94 /**
95 * Used to get the paradigm region for a cluster, if there is one.
96 */
97 const LSR *paradigmLSRs;
98 int32_t paradigmLSRsLength;
99
100 int32_t defaultLanguageDistance;
101 int32_t defaultScriptDistance;
102 int32_t defaultRegionDistance;
103 int32_t minRegionDistance;
104 int32_t defaultDemotionPerDesiredLocale;
105};
106
107U_NAMESPACE_END
108
109#endif // __LOCDISTANCE_H__
110