1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 2013-2016, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * collationfastlatinbuilder.h |
9 | * |
10 | * created on: 2013aug09 |
11 | * created by: Markus W. Scherer |
12 | */ |
13 | |
14 | #ifndef __COLLATIONFASTLATINBUILDER_H__ |
15 | #define __COLLATIONFASTLATINBUILDER_H__ |
16 | |
17 | #include "unicode/utypes.h" |
18 | |
19 | #if !UCONFIG_NO_COLLATION |
20 | |
21 | #include "unicode/ucol.h" |
22 | #include "unicode/unistr.h" |
23 | #include "unicode/uobject.h" |
24 | #include "collation.h" |
25 | #include "collationfastlatin.h" |
26 | #include "uvectr64.h" |
27 | |
28 | U_NAMESPACE_BEGIN |
29 | |
30 | struct CollationData; |
31 | |
32 | class U_I18N_API CollationFastLatinBuilder : public UObject { |
33 | public: |
34 | CollationFastLatinBuilder(UErrorCode &errorCode); |
35 | ~CollationFastLatinBuilder(); |
36 | |
37 | UBool forData(const CollationData &data, UErrorCode &errorCode); |
38 | |
39 | const uint16_t *getTable() const { |
40 | return reinterpret_cast<const uint16_t *>(result.getBuffer()); |
41 | } |
42 | int32_t lengthOfTable() const { return result.length(); } |
43 | |
44 | private: |
45 | // space, punct, symbol, currency (not digit) |
46 | enum { NUM_SPECIAL_GROUPS = UCOL_REORDER_CODE_CURRENCY - UCOL_REORDER_CODE_FIRST + 1 }; |
47 | |
48 | UBool loadGroups(const CollationData &data, UErrorCode &errorCode); |
49 | UBool inSameGroup(uint32_t p, uint32_t q) const; |
50 | |
51 | void resetCEs(); |
52 | void getCEs(const CollationData &data, UErrorCode &errorCode); |
53 | UBool getCEsFromCE32(const CollationData &data, UChar32 c, uint32_t ce32, |
54 | UErrorCode &errorCode); |
55 | UBool getCEsFromContractionCE32(const CollationData &data, uint32_t ce32, |
56 | UErrorCode &errorCode); |
57 | void addContractionEntry(int32_t x, int64_t cce0, int64_t cce1, UErrorCode &errorCode); |
58 | void addUniqueCE(int64_t ce, UErrorCode &errorCode); |
59 | uint32_t getMiniCE(int64_t ce) const; |
60 | UBool encodeUniqueCEs(UErrorCode &errorCode); |
61 | UBool encodeCharCEs(UErrorCode &errorCode); |
62 | UBool encodeContractions(UErrorCode &errorCode); |
63 | uint32_t encodeTwoCEs(int64_t first, int64_t second) const; |
64 | |
65 | static UBool isContractionCharCE(int64_t ce) { |
66 | return (uint32_t)(ce >> 32) == Collation::NO_CE_PRIMARY && ce != Collation::NO_CE; |
67 | } |
68 | |
69 | static const uint32_t CONTRACTION_FLAG = 0x80000000; |
70 | |
71 | // temporary "buffer" |
72 | int64_t ce0, ce1; |
73 | |
74 | int64_t charCEs[CollationFastLatin::NUM_FAST_CHARS][2]; |
75 | |
76 | UVector64 contractionCEs; |
77 | UVector64 uniqueCEs; |
78 | |
79 | /** One 16-bit mini CE per unique CE. */ |
80 | uint16_t *miniCEs; |
81 | |
82 | // These are constant for a given root collator. |
83 | uint32_t lastSpecialPrimaries[NUM_SPECIAL_GROUPS]; |
84 | uint32_t firstDigitPrimary; |
85 | uint32_t firstLatinPrimary; |
86 | uint32_t lastLatinPrimary; |
87 | // This determines the first normal primary weight which is mapped to |
88 | // a short mini primary. It must be >=firstDigitPrimary. |
89 | uint32_t firstShortPrimary; |
90 | |
91 | UBool shortPrimaryOverflow; |
92 | |
93 | UnicodeString result; |
94 | int32_t ; |
95 | }; |
96 | |
97 | U_NAMESPACE_END |
98 | |
99 | #endif // !UCONFIG_NO_COLLATION |
100 | #endif // __COLLATIONFASTLATINBUILDER_H__ |
101 | |