| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | // |
| 5 | |
| 6 | // |
| 7 | |
| 8 | // |
| 9 | // Provides a mechanism to store an array of DWORD-typed fields in a space-efficient manner. There are some |
| 10 | // caveats: |
| 11 | // 1) Fields can be written and read in an uncompressed form (a simple array of DWORD values) until the |
| 12 | // PackFields() method is invoked. Once this method has been invoked (and returned true) fields have been |
| 13 | // compacted and must not be modified again. That is, the primary usage of this class is to store a set of |
| 14 | // initialized-once fields. |
| 15 | // 2) The compaction algorithm relies on the fields containing small values (such as counts). Avoid storing |
| 16 | // fields that have special sentinel values (such as all bits set) which will frequently set high order |
| 17 | // bits. |
| 18 | // 3) An instance of PackedDWORDFields will take up a fixed quantity of memory equivalent to an array of |
| 19 | // DWORD fields. If PackFields() returns true then the fields values frozen at the time of the call have |
| 20 | // been compressed into a fewer number of bytes in-place. This smaller size will always be a multiple of |
| 21 | // sizeof(DWORD) in length and is reported by GetPackedSize(). If a PackedDWORDFields structure is being |
| 22 | // declared as a field inside another structure it is typically wise to place the field last to take |
| 23 | // advantage of this size reduction (e.g. when saving the outer structure into an ngen image). If |
| 24 | // PackFields() returns false then the fields remain unpacked and unchanged. |
| 25 | // 4) The caller retains the responsibility of recording whether an instance of PackedDWORDFields is in the |
| 26 | // packed or unpacked state. This is important since incorrect behavior will result if the wrong methods |
| 27 | // are used for the current state (e.g. calling GetUnpackedField() on a packed instance). This is not done |
| 28 | // automatically since there are no bits free to store the state. However, under a debug build correct |
| 29 | // usage will be checked (at the expensive of extra storage space). |
| 30 | // 5) The space saving made come at a runtime CPU cost to access the fields. Do not use this mechanism to |
| 31 | // compact fields that must be read on a perfomance critical path. If unsure, measure the performance of |
| 32 | // this solution before committing to it. |
| 33 | // |
| 34 | // ============================================================================ |
| 35 | |
| 36 | // Describe an array of FIELD_COUNT DWORDs. Each entry is addressed via a zero-based index and is expected to |
| 37 | // frequently contain a small integer and remain frozen after initialization. |
| 38 | template <DWORD FIELD_COUNT> |
| 39 | class PackedDWORDFields |
| 40 | { |
| 41 | // Some constants to make the code a little more readable. |
| 42 | enum Constants |
| 43 | { |
| 44 | kMaxLengthBits = 5, // Number of bits needed to express the maximum length of a field (32-bits) |
| 45 | kBitsPerDWORD = 32, // Number of bits in a DWORD |
| 46 | }; |
| 47 | |
| 48 | public: |
| 49 | // Fields are all initialized to zero. |
| 50 | PackedDWORDFields() |
| 51 | { |
| 52 | LIMITED_METHOD_DAC_CONTRACT; |
| 53 | |
| 54 | memset(m_rgUnpackedFields, 0, sizeof(m_rgUnpackedFields)); |
| 55 | #ifdef _DEBUG |
| 56 | memset(m_rgDebugUnpackedFields, 0, sizeof(m_rgDebugUnpackedFields)); |
| 57 | m_fFieldsPacked = false; |
| 58 | #endif // _DEBUG |
| 59 | } |
| 60 | |
| 61 | // Get the value of the given field when the structure is in its unpacked state. |
| 62 | DWORD GetUnpackedField(DWORD dwFieldIndex) |
| 63 | { |
| 64 | LIMITED_METHOD_DAC_CONTRACT; |
| 65 | |
| 66 | _ASSERTE(dwFieldIndex < FIELD_COUNT); |
| 67 | _ASSERTE(!m_fFieldsPacked); |
| 68 | |
| 69 | return m_rgUnpackedFields[dwFieldIndex]; |
| 70 | } |
| 71 | |
| 72 | // Set the value of the given field when the structure is in its unpacked state. Setting field values |
| 73 | // multiple times is allowed but only until a successful call to PackFields is made. |
| 74 | void SetUnpackedField(DWORD dwFieldIndex, DWORD dwValue) |
| 75 | { |
| 76 | LIMITED_METHOD_CONTRACT; |
| 77 | |
| 78 | _ASSERTE(dwFieldIndex < FIELD_COUNT); |
| 79 | _ASSERTE(!m_fFieldsPacked); |
| 80 | |
| 81 | m_rgUnpackedFields[dwFieldIndex] = dwValue; |
| 82 | |
| 83 | #ifdef _DEBUG |
| 84 | m_rgDebugUnpackedFields[dwFieldIndex] = dwValue; |
| 85 | #endif // _DEBUG |
| 86 | } |
| 87 | |
| 88 | // Attempt to optimize the set of fields given their current values. Returns false if compaction wouldn't |
| 89 | // achieve any space savings (in this case the structure remains in the unpacked state and the caller can |
| 90 | // continue to use the *UnpackedField methods above or even re-attempt PackFields() with different field |
| 91 | // values). If true is returned the data has been compacted into a smaller amount of space (this will |
| 92 | // always be a multiple of sizeof(DWORD) in size). This size can be queried using GetPackedSize() below. |
| 93 | // Once PackFields() has returned true fields can no longer be modified and field values must be retrieved |
| 94 | // via GetPackedField() rather than GetUnpackedField(). |
| 95 | bool PackFields() |
| 96 | { |
| 97 | CONTRACTL |
| 98 | { |
| 99 | NOTHROW; |
| 100 | GC_NOTRIGGER; |
| 101 | MODE_ANY; |
| 102 | SO_INTOLERANT; |
| 103 | } |
| 104 | CONTRACTL_END; |
| 105 | |
| 106 | // Can't re-pack a packed structure. |
| 107 | _ASSERTE(!m_fFieldsPacked); |
| 108 | |
| 109 | // First compute the number of bits of space we'd need for a packed representation. Do this before |
| 110 | // making any changes since sometimes we'd end up expanding the data instead and in this case we wish |
| 111 | // to return false and make no updates to the structure. |
| 112 | |
| 113 | // There's a fixed overhead of kMaxLengthBits for each field (we store the packed fields as a |
| 114 | // bit-stream that alternates between a field length (of kMaxLengthBits) followed by a variable length |
| 115 | // bitfield containing the field value. |
| 116 | DWORD dwTotalPackedBits = FIELD_COUNT * kMaxLengthBits; |
| 117 | |
| 118 | // For each field calculate excatly how many bits we'd need to store the field value and add this to |
| 119 | // the total. |
| 120 | for (DWORD i = 0; i < FIELD_COUNT; i++) |
| 121 | dwTotalPackedBits += BitsRequired(m_rgUnpackedFields[i]); |
| 122 | |
| 123 | // Now we have the total is it smaller than a simple array of DWORDs? |
| 124 | if (dwTotalPackedBits >= (FIELD_COUNT * kBitsPerDWORD)) |
| 125 | return false; |
| 126 | |
| 127 | // Compaction will save us space. We're committed to implementing that compaction now. |
| 128 | |
| 129 | // Work from a copy of the unpacked fields since we're about to start modifying the space in which |
| 130 | // they're currently stored. |
| 131 | DWORD rgUnpackedFields[FIELD_COUNT]; |
| 132 | memcpy(rgUnpackedFields, m_rgUnpackedFields, sizeof(rgUnpackedFields)); |
| 133 | |
| 134 | // Start writing a stream of bits. For each field write a fixed sized header describing the number of |
| 135 | // bits required to express the field followed by the field value itself. |
| 136 | DWORD dwOffset = 0; |
| 137 | for (DWORD i = 0; i < FIELD_COUNT; i++) |
| 138 | { |
| 139 | // Find the minimal number of bits required to encode the current field's value. |
| 140 | DWORD dwFieldLength = BitsRequired(rgUnpackedFields[i]); |
| 141 | _ASSERTE(dwFieldLength > 0 && dwFieldLength <= kBitsPerDWORD); |
| 142 | |
| 143 | // Write the size field. Note that we store the size biased by one. That is, a field length of one |
| 144 | // is encoded as zero. We do this so we can express a range of field sizes from 1 through 32, |
| 145 | // emcompassing the worst case scenario (a full 32 bits). It comes at the cost of not being able |
| 146 | // to encode zero-valued fields with zero bits. Is this is deemed an important optimization in the |
| 147 | // future we could always given up on a simple linear mapping of the size field and use a lookup |
| 148 | // table to map values encoded into the real sizes. Experiments with EEClass packed fields over |
| 149 | // mscorlib show that this currently doesn't yield us much benefit, primarily due to the DWORD |
| 150 | // round-up size semantic, which implies we'd need a lot more optimization than this to reduce the |
| 151 | // average structure size below the next DWORD threshhold. |
| 152 | BitVectorSet(dwOffset, kMaxLengthBits, dwFieldLength - 1); |
| 153 | dwOffset += kMaxLengthBits; |
| 154 | |
| 155 | // Write the field value itself. |
| 156 | BitVectorSet(dwOffset, dwFieldLength, rgUnpackedFields[i]); |
| 157 | dwOffset += dwFieldLength; |
| 158 | } |
| 159 | |
| 160 | #ifdef _DEBUG |
| 161 | m_fFieldsPacked = true; |
| 162 | #endif // _DEBUG |
| 163 | |
| 164 | // Compaction was successful. |
| 165 | return true; |
| 166 | } |
| 167 | |
| 168 | // Return the size in bytes of a compacted structure (it is illegal to call this on an uncompacted |
| 169 | // structure). |
| 170 | DWORD GetPackedSize() |
| 171 | { |
| 172 | LIMITED_METHOD_DAC_CONTRACT; |
| 173 | |
| 174 | _ASSERTE(m_fFieldsPacked); |
| 175 | |
| 176 | // Walk the field stream reading header (which are fixed size) and then using the value of the headers |
| 177 | // to skip the field value. |
| 178 | DWORD cBits = 0; |
| 179 | for (DWORD i = 0; i < FIELD_COUNT; i++) |
| 180 | cBits += kMaxLengthBits + BitVectorGet(cBits, kMaxLengthBits) + 1; // +1 since size is [1,32] not [0,31] |
| 181 | |
| 182 | // Compute the number of DWORDs needed to store the bits of the encoding. |
| 183 | // static_cast would not be necessary if ALIGN_UP were templated like FitsIn. |
| 184 | DWORD cDWORDs = static_cast<DWORD>(ALIGN_UP(cBits, kBitsPerDWORD)) / kBitsPerDWORD; |
| 185 | |
| 186 | // Return the total structure size. |
| 187 | return offsetof(PackedDWORDFields<FIELD_COUNT>, m_rgPackedFields) + (cDWORDs * sizeof(DWORD)); |
| 188 | } |
| 189 | |
| 190 | // Get the value of a packed field. Illegal to call this on an uncompacted structure. |
| 191 | DWORD GetPackedField(DWORD dwFieldIndex) |
| 192 | { |
| 193 | CONTRACTL |
| 194 | { |
| 195 | NOTHROW; |
| 196 | GC_NOTRIGGER; |
| 197 | MODE_ANY; |
| 198 | SUPPORTS_DAC; |
| 199 | SO_TOLERANT; |
| 200 | } |
| 201 | CONTRACTL_END; |
| 202 | |
| 203 | _ASSERTE(dwFieldIndex < FIELD_COUNT); |
| 204 | _ASSERTE(m_fFieldsPacked); |
| 205 | |
| 206 | // Walk past all the predecessor fields. |
| 207 | DWORD dwOffset = 0; |
| 208 | for (DWORD i = 0; i < dwFieldIndex; i++) |
| 209 | dwOffset += kMaxLengthBits + BitVectorGet(dwOffset, kMaxLengthBits) + 1; // +1 since size is [1,32] not [0,31] |
| 210 | |
| 211 | // The next kMaxLengthBits bits contain the length in bits of the field we want (-1 due to the way we |
| 212 | // encode the length). |
| 213 | DWORD dwFieldLength = BitVectorGet(dwOffset, kMaxLengthBits) + 1; |
| 214 | dwOffset += kMaxLengthBits; |
| 215 | |
| 216 | // Grab the field value. |
| 217 | DWORD dwReturn = BitVectorGet(dwOffset, dwFieldLength); |
| 218 | |
| 219 | // On debug builds ensure the encoded field value is the same as the original unpacked version. |
| 220 | _ASSERTE(dwReturn == m_rgDebugUnpackedFields[dwFieldIndex]); |
| 221 | return dwReturn; |
| 222 | } |
| 223 | |
| 224 | private: |
| 225 | // Return the minimum number of bits required to encode a DWORD value by stripping out the |
| 226 | // most-significant leading zero bits). Returns a value between 1 and 32 inclusive (we never encode |
| 227 | // anything with zero bits). |
| 228 | DWORD BitsRequired(DWORD dwValue) |
| 229 | { |
| 230 | LIMITED_METHOD_CONTRACT; |
| 231 | |
| 232 | // Starting with a bit-mask of the most significant bit and iterating over masks for successively less |
| 233 | // significant bits, stop as soon as the mask co-incides with a set bit in the value. Simultaneously |
| 234 | // we're counting down the bits required to express the range of values implied by seeing the |
| 235 | // corresponding bit set in the value (e.g. when we're testing the high bit we know we'd need 32-bits |
| 236 | // to encode the range of values that have this bit set). Stop when we get to one bit (we never return |
| 237 | // 0 bits required, even for an input value of 0). |
| 238 | DWORD dwMask = 0x80000000; |
| 239 | DWORD cBits = 32; |
| 240 | while (cBits > 1) |
| 241 | { |
| 242 | if (dwValue & dwMask) |
| 243 | return cBits; |
| 244 | |
| 245 | dwMask >>= 1; |
| 246 | cBits--; |
| 247 | } |
| 248 | |
| 249 | return 1; |
| 250 | } |
| 251 | |
| 252 | // Set the dwLength bits at m_rgPackedFields + dwOffset bits to the value dwValue. |
| 253 | void BitVectorSet(DWORD dwOffset, DWORD dwLength, DWORD dwValue) |
| 254 | { |
| 255 | LIMITED_METHOD_CONTRACT; |
| 256 | |
| 257 | _ASSERTE(dwLength > 0 && dwLength <= kBitsPerDWORD); // Can set at most one DWORD at a time |
| 258 | _ASSERTE((dwLength == kBitsPerDWORD) || (dwValue < (1U << dwLength))); // Value had better fit in the given length |
| 259 | |
| 260 | // Calculate the start and end naturally aligned DWORDs into which the value will go. |
| 261 | DWORD dwStartBlock = dwOffset / kBitsPerDWORD; |
| 262 | DWORD dwEndBlock = (dwOffset + dwLength - 1) / kBitsPerDWORD; |
| 263 | if (dwStartBlock == dwEndBlock) |
| 264 | { |
| 265 | // Easy case: the new value fits entirely within one aligned DWORD. Compute the number of bits |
| 266 | // we'll need to shift the input value (to the left) and a mask of the bits that will be set in |
| 267 | // the destination DWORD. |
| 268 | DWORD dwValueShift = dwOffset % kBitsPerDWORD; |
| 269 | DWORD dwValueMask = ((1U << dwLength) - 1) << dwValueShift; |
| 270 | |
| 271 | m_rgPackedFields[dwStartBlock] &= ~dwValueMask; // Zero the target bits |
| 272 | m_rgPackedFields[dwStartBlock] |= dwValue << dwValueShift; // Or in the new value (suitably shifted) |
| 273 | } |
| 274 | else |
| 275 | { |
| 276 | // Hard case: the new value is split across two DWORDs (two DWORDs is the max as the new value can |
| 277 | // be at most DWORD-sized itself). For simplicity we'll simply break this into two separate |
| 278 | // non-spanning sets. We can revisit this in the future if the perf is a problem. |
| 279 | DWORD dwInitialBits = kBitsPerDWORD - (dwOffset % kBitsPerDWORD); // Number of bits to set in the first DWORD |
| 280 | DWORD dwInitialMask = (1U << dwInitialBits) - 1; // Mask covering those value bits |
| 281 | |
| 282 | // Set the portion of the value residing in the first DWORD. |
| 283 | BitVectorSet(dwOffset, dwInitialBits, dwValue & dwInitialMask); |
| 284 | |
| 285 | // And then the remainder in the second DWORD. |
| 286 | BitVectorSet(dwOffset + dwInitialBits, dwLength - dwInitialBits, dwValue >> dwInitialBits); |
| 287 | } |
| 288 | |
| 289 | _ASSERTE(BitVectorGet(dwOffset, dwLength) == dwValue); |
| 290 | } |
| 291 | |
| 292 | // Get the dwLength bits at m_rgPackedFields + dwOffset bits. Value is zero-extended to DWORD size. |
| 293 | DWORD BitVectorGet(DWORD dwOffset, DWORD dwLength) |
| 294 | { |
| 295 | LIMITED_METHOD_DAC_CONTRACT; |
| 296 | |
| 297 | _ASSERTE(dwLength > 0 && dwLength <= kBitsPerDWORD); // Can get at most one DWORD at a time |
| 298 | |
| 299 | // Calculate the start and end naturally aligned DWORDs from which the value will come. |
| 300 | DWORD dwStartBlock = dwOffset / kBitsPerDWORD; |
| 301 | DWORD dwEndBlock = (dwOffset + dwLength - 1) / kBitsPerDWORD; |
| 302 | if (dwStartBlock == dwEndBlock) |
| 303 | { |
| 304 | // Easy case: the new value fits entirely within one aligned DWORD. Compute the number of bits |
| 305 | // we'll need to shift the extracted value (to the right) and a mask of the bits that will be |
| 306 | // extracted in the destination DWORD. |
| 307 | DWORD dwValueShift = dwOffset % kBitsPerDWORD; |
| 308 | DWORD dwValueMask = ((1U << dwLength) - 1) << dwValueShift; |
| 309 | |
| 310 | // Mask out the bits we want and shift them down into the bottom of the result DWORD. |
| 311 | return (m_rgPackedFields[dwStartBlock] & dwValueMask) >> dwValueShift; |
| 312 | } |
| 313 | else |
| 314 | { |
| 315 | // Hard case: the return value is split across two DWORDs (two DWORDs is the max as the new value |
| 316 | // can be at most DWORD-sized itself). For simplicity we'll simply break this into two separate |
| 317 | // non-spanning gets and stitch the result together from that. We can revisit this in the future |
| 318 | // if the perf is a problem. |
| 319 | DWORD dwInitialBits = kBitsPerDWORD - (dwOffset % kBitsPerDWORD); // Number of bits to get in the first DWORD |
| 320 | DWORD dwReturn; |
| 321 | |
| 322 | // Get the initial (low-order) bits from the first DWORD. |
| 323 | dwReturn = BitVectorGet(dwOffset, dwInitialBits); |
| 324 | |
| 325 | // Get the remaining bits from the second DWORD. These bits will need to be shifted to the left |
| 326 | // (past the bits we've already read) before being OR'd into the result. |
| 327 | dwReturn |= BitVectorGet(dwOffset + dwInitialBits, dwLength - dwInitialBits) << dwInitialBits; |
| 328 | |
| 329 | return dwReturn; |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | #ifdef _DEBUG |
| 334 | DWORD m_rgDebugUnpackedFields[FIELD_COUNT]; // A copy of the unpacked fields so we can validate |
| 335 | // packed reads |
| 336 | bool m_fFieldsPacked; // The current packed/unpacked state so we can check |
| 337 | // the right methods are being called |
| 338 | #endif // _DEBUG |
| 339 | |
| 340 | union |
| 341 | { |
| 342 | DWORD m_rgUnpackedFields[FIELD_COUNT]; // The fields in their unpacked state |
| 343 | DWORD m_rgPackedFields[1]; // The first DWORD block of fields in the packed state |
| 344 | }; |
| 345 | }; |
| 346 | |