| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ****************************************************************************** | 
|---|
| 5 | * | 
|---|
| 6 | *   Copyright (C) 2001-2008, International Business Machines | 
|---|
| 7 | *   Corporation and others.  All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | ****************************************************************************** | 
|---|
| 10 | *   file name:  utrie2_impl.h | 
|---|
| 11 | *   encoding:   UTF-8 | 
|---|
| 12 | *   tab size:   8 (not used) | 
|---|
| 13 | *   indentation:4 | 
|---|
| 14 | * | 
|---|
| 15 | *   created on: 2008sep26 (split off from utrie2.c) | 
|---|
| 16 | *   created by: Markus W. Scherer | 
|---|
| 17 | * | 
|---|
| 18 | *   Definitions needed for both runtime and builder code for UTrie2, | 
|---|
| 19 | *   used by utrie2.c and utrie2_builder.c. | 
|---|
| 20 | */ | 
|---|
| 21 |  | 
|---|
| 22 | #ifndef __UTRIE2_IMPL_H__ | 
|---|
| 23 | #define __UTRIE2_IMPL_H__ | 
|---|
| 24 |  | 
|---|
| 25 | #ifdef UCPTRIE_DEBUG | 
|---|
| 26 | #include "unicode/umutablecptrie.h" | 
|---|
| 27 | #endif | 
|---|
| 28 | #include "utrie2.h" | 
|---|
| 29 |  | 
|---|
| 30 | /* Public UTrie2 API implementation ----------------------------------------- */ | 
|---|
| 31 |  | 
|---|
| 32 | /* | 
|---|
| 33 | * These definitions are mostly needed by utrie2.cpp, | 
|---|
| 34 | * but also by utrie2_serialize() and utrie2_swap(). | 
|---|
| 35 | */ | 
|---|
| 36 |  | 
|---|
| 37 | // UTrie2 signature values, in platform endianness and opposite endianness. | 
|---|
| 38 | // The UTrie2 signature ASCII byte values spell "Tri2". | 
|---|
| 39 | #define UTRIE2_SIG      0x54726932 | 
|---|
| 40 | #define UTRIE2_OE_SIG   0x32697254 | 
|---|
| 41 |  | 
|---|
| 42 | /** | 
|---|
| 43 | * Trie data structure in serialized form: | 
|---|
| 44 | * | 
|---|
| 45 | * UTrie2Header header; | 
|---|
| 46 | * uint16_t index[header.index2Length]; | 
|---|
| 47 | * uint16_t data[header.shiftedDataLength<<2];  -- or uint32_t data[...] | 
|---|
| 48 | * @internal | 
|---|
| 49 | */ | 
|---|
| 50 | typedef struct  { | 
|---|
| 51 | /** "Tri2" in big-endian US-ASCII (0x54726932) */ | 
|---|
| 52 | uint32_t ; | 
|---|
| 53 |  | 
|---|
| 54 | /** | 
|---|
| 55 | * options bit field: | 
|---|
| 56 | * 15.. 4   reserved (0) | 
|---|
| 57 | *  3.. 0   UTrie2ValueBits valueBits | 
|---|
| 58 | */ | 
|---|
| 59 | uint16_t ; | 
|---|
| 60 |  | 
|---|
| 61 | /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ | 
|---|
| 62 | uint16_t ; | 
|---|
| 63 |  | 
|---|
| 64 | /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ | 
|---|
| 65 | uint16_t ; | 
|---|
| 66 |  | 
|---|
| 67 | /** Null index and data blocks, not shifted. */ | 
|---|
| 68 | uint16_t , ; | 
|---|
| 69 |  | 
|---|
| 70 | /** | 
|---|
| 71 | * First code point of the single-value range ending with U+10ffff, | 
|---|
| 72 | * rounded up and then shifted right by UTRIE2_SHIFT_1. | 
|---|
| 73 | */ | 
|---|
| 74 | uint16_t ; | 
|---|
| 75 | } ; | 
|---|
| 76 |  | 
|---|
| 77 | /** | 
|---|
| 78 | * Constants for use with UTrie2Header.options. | 
|---|
| 79 | * @internal | 
|---|
| 80 | */ | 
|---|
| 81 | enum { | 
|---|
| 82 | /** Mask to get the UTrie2ValueBits valueBits from options. */ | 
|---|
| 83 | UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf | 
|---|
| 84 | }; | 
|---|
| 85 |  | 
|---|
| 86 | /* Building a trie ---------------------------------------------------------- */ | 
|---|
| 87 |  | 
|---|
| 88 | /* | 
|---|
| 89 | * These definitions are mostly needed by utrie2_builder.c, but also by | 
|---|
| 90 | * utrie2_get32() and utrie2_enum(). | 
|---|
| 91 | */ | 
|---|
| 92 |  | 
|---|
| 93 | enum { | 
|---|
| 94 | /** | 
|---|
| 95 | * At build time, leave a gap in the index-2 table, | 
|---|
| 96 | * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table | 
|---|
| 97 | * and the supplementary index-1 table. | 
|---|
| 98 | * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting. | 
|---|
| 99 | */ | 
|---|
| 100 | UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, | 
|---|
| 101 | UNEWTRIE2_INDEX_GAP_LENGTH= | 
|---|
| 102 | ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)& | 
|---|
| 103 | ~UTRIE2_INDEX_2_MASK, | 
|---|
| 104 |  | 
|---|
| 105 | /** | 
|---|
| 106 | * Maximum length of the build-time index-2 array. | 
|---|
| 107 | * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2, | 
|---|
| 108 | * plus the part of the index-2 table for lead surrogate code points, | 
|---|
| 109 | * plus the build-time index gap, | 
|---|
| 110 | * plus the null index-2 block. | 
|---|
| 111 | */ | 
|---|
| 112 | UNEWTRIE2_MAX_INDEX_2_LENGTH= | 
|---|
| 113 | (0x110000>>UTRIE2_SHIFT_2)+ | 
|---|
| 114 | UTRIE2_LSCP_INDEX_2_LENGTH+ | 
|---|
| 115 | UNEWTRIE2_INDEX_GAP_LENGTH+ | 
|---|
| 116 | UTRIE2_INDEX_2_BLOCK_LENGTH, | 
|---|
| 117 |  | 
|---|
| 118 | UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1 | 
|---|
| 119 | }; | 
|---|
| 120 |  | 
|---|
| 121 | /** | 
|---|
| 122 | * Maximum length of the build-time data array. | 
|---|
| 123 | * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block, | 
|---|
| 124 | * plus values for the 0x400 surrogate code units. | 
|---|
| 125 | */ | 
|---|
| 126 | #define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400) | 
|---|
| 127 |  | 
|---|
| 128 | /* | 
|---|
| 129 | * Build-time trie structure. | 
|---|
| 130 | * | 
|---|
| 131 | * Just using a boolean flag for "repeat use" could lead to data array overflow | 
|---|
| 132 | * because we would not be able to detect when a data block becomes unused. | 
|---|
| 133 | * It also leads to orphan data blocks that are kept through serialization. | 
|---|
| 134 | * | 
|---|
| 135 | * Need to use reference counting for data blocks, | 
|---|
| 136 | * and allocDataBlock() needs to look for a free block before increasing dataLength. | 
|---|
| 137 | * | 
|---|
| 138 | * This scheme seems like overkill for index-2 blocks since the whole index array is | 
|---|
| 139 | * preallocated anyway (unlike the growable data array). | 
|---|
| 140 | * Just allocating multiple index-2 blocks as needed. | 
|---|
| 141 | */ | 
|---|
| 142 | struct UNewTrie2 { | 
|---|
| 143 | int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; | 
|---|
| 144 | int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; | 
|---|
| 145 | uint32_t *data; | 
|---|
| 146 | #ifdef UCPTRIE_DEBUG | 
|---|
| 147 | UMutableCPTrie *t3; | 
|---|
| 148 | #endif | 
|---|
| 149 |  | 
|---|
| 150 | uint32_t initialValue, errorValue; | 
|---|
| 151 | int32_t index2Length, dataCapacity, dataLength; | 
|---|
| 152 | int32_t firstFreeBlock; | 
|---|
| 153 | int32_t index2NullOffset, dataNullOffset; | 
|---|
| 154 | UChar32 highStart; | 
|---|
| 155 | UBool isCompacted; | 
|---|
| 156 |  | 
|---|
| 157 | /** | 
|---|
| 158 | * Multi-purpose per-data-block table. | 
|---|
| 159 | * | 
|---|
| 160 | * Before compacting: | 
|---|
| 161 | * | 
|---|
| 162 | * Per-data-block reference counters/free-block list. | 
|---|
| 163 | *  0: unused | 
|---|
| 164 | * >0: reference counter (number of index-2 entries pointing here) | 
|---|
| 165 | * <0: next free data block in free-block list | 
|---|
| 166 | * | 
|---|
| 167 | * While compacting: | 
|---|
| 168 | * | 
|---|
| 169 | * Map of adjusted indexes, used in compactData() and compactIndex2(). | 
|---|
| 170 | * Maps from original indexes to new ones. | 
|---|
| 171 | */ | 
|---|
| 172 | int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; | 
|---|
| 173 | }; | 
|---|
| 174 |  | 
|---|
| 175 | #endif | 
|---|
| 176 |  | 
|---|