1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | // |
5 | |
6 | // |
7 | |
8 | // |
9 | // Provides a mechanism to store an array of DWORD-typed fields in a space-efficient manner. There are some |
10 | // caveats: |
11 | // 1) Fields can be written and read in an uncompressed form (a simple array of DWORD values) until the |
12 | // PackFields() method is invoked. Once this method has been invoked (and returned true) fields have been |
13 | // compacted and must not be modified again. That is, the primary usage of this class is to store a set of |
14 | // initialized-once fields. |
15 | // 2) The compaction algorithm relies on the fields containing small values (such as counts). Avoid storing |
16 | // fields that have special sentinel values (such as all bits set) which will frequently set high order |
17 | // bits. |
18 | // 3) An instance of PackedDWORDFields will take up a fixed quantity of memory equivalent to an array of |
19 | // DWORD fields. If PackFields() returns true then the fields values frozen at the time of the call have |
20 | // been compressed into a fewer number of bytes in-place. This smaller size will always be a multiple of |
21 | // sizeof(DWORD) in length and is reported by GetPackedSize(). If a PackedDWORDFields structure is being |
22 | // declared as a field inside another structure it is typically wise to place the field last to take |
23 | // advantage of this size reduction (e.g. when saving the outer structure into an ngen image). If |
24 | // PackFields() returns false then the fields remain unpacked and unchanged. |
25 | // 4) The caller retains the responsibility of recording whether an instance of PackedDWORDFields is in the |
26 | // packed or unpacked state. This is important since incorrect behavior will result if the wrong methods |
27 | // are used for the current state (e.g. calling GetUnpackedField() on a packed instance). This is not done |
28 | // automatically since there are no bits free to store the state. However, under a debug build correct |
29 | // usage will be checked (at the expensive of extra storage space). |
30 | // 5) The space saving made come at a runtime CPU cost to access the fields. Do not use this mechanism to |
31 | // compact fields that must be read on a perfomance critical path. If unsure, measure the performance of |
32 | // this solution before committing to it. |
33 | // |
34 | // ============================================================================ |
35 | |
36 | // Describe an array of FIELD_COUNT DWORDs. Each entry is addressed via a zero-based index and is expected to |
37 | // frequently contain a small integer and remain frozen after initialization. |
38 | template <DWORD FIELD_COUNT> |
39 | class PackedDWORDFields |
40 | { |
41 | // Some constants to make the code a little more readable. |
42 | enum Constants |
43 | { |
44 | kMaxLengthBits = 5, // Number of bits needed to express the maximum length of a field (32-bits) |
45 | kBitsPerDWORD = 32, // Number of bits in a DWORD |
46 | }; |
47 | |
48 | public: |
49 | // Fields are all initialized to zero. |
50 | PackedDWORDFields() |
51 | { |
52 | LIMITED_METHOD_DAC_CONTRACT; |
53 | |
54 | memset(m_rgUnpackedFields, 0, sizeof(m_rgUnpackedFields)); |
55 | #ifdef _DEBUG |
56 | memset(m_rgDebugUnpackedFields, 0, sizeof(m_rgDebugUnpackedFields)); |
57 | m_fFieldsPacked = false; |
58 | #endif // _DEBUG |
59 | } |
60 | |
61 | // Get the value of the given field when the structure is in its unpacked state. |
62 | DWORD GetUnpackedField(DWORD dwFieldIndex) |
63 | { |
64 | LIMITED_METHOD_DAC_CONTRACT; |
65 | |
66 | _ASSERTE(dwFieldIndex < FIELD_COUNT); |
67 | _ASSERTE(!m_fFieldsPacked); |
68 | |
69 | return m_rgUnpackedFields[dwFieldIndex]; |
70 | } |
71 | |
72 | // Set the value of the given field when the structure is in its unpacked state. Setting field values |
73 | // multiple times is allowed but only until a successful call to PackFields is made. |
74 | void SetUnpackedField(DWORD dwFieldIndex, DWORD dwValue) |
75 | { |
76 | LIMITED_METHOD_CONTRACT; |
77 | |
78 | _ASSERTE(dwFieldIndex < FIELD_COUNT); |
79 | _ASSERTE(!m_fFieldsPacked); |
80 | |
81 | m_rgUnpackedFields[dwFieldIndex] = dwValue; |
82 | |
83 | #ifdef _DEBUG |
84 | m_rgDebugUnpackedFields[dwFieldIndex] = dwValue; |
85 | #endif // _DEBUG |
86 | } |
87 | |
88 | // Attempt to optimize the set of fields given their current values. Returns false if compaction wouldn't |
89 | // achieve any space savings (in this case the structure remains in the unpacked state and the caller can |
90 | // continue to use the *UnpackedField methods above or even re-attempt PackFields() with different field |
91 | // values). If true is returned the data has been compacted into a smaller amount of space (this will |
92 | // always be a multiple of sizeof(DWORD) in size). This size can be queried using GetPackedSize() below. |
93 | // Once PackFields() has returned true fields can no longer be modified and field values must be retrieved |
94 | // via GetPackedField() rather than GetUnpackedField(). |
95 | bool PackFields() |
96 | { |
97 | CONTRACTL |
98 | { |
99 | NOTHROW; |
100 | GC_NOTRIGGER; |
101 | MODE_ANY; |
102 | SO_INTOLERANT; |
103 | } |
104 | CONTRACTL_END; |
105 | |
106 | // Can't re-pack a packed structure. |
107 | _ASSERTE(!m_fFieldsPacked); |
108 | |
109 | // First compute the number of bits of space we'd need for a packed representation. Do this before |
110 | // making any changes since sometimes we'd end up expanding the data instead and in this case we wish |
111 | // to return false and make no updates to the structure. |
112 | |
113 | // There's a fixed overhead of kMaxLengthBits for each field (we store the packed fields as a |
114 | // bit-stream that alternates between a field length (of kMaxLengthBits) followed by a variable length |
115 | // bitfield containing the field value. |
116 | DWORD dwTotalPackedBits = FIELD_COUNT * kMaxLengthBits; |
117 | |
118 | // For each field calculate excatly how many bits we'd need to store the field value and add this to |
119 | // the total. |
120 | for (DWORD i = 0; i < FIELD_COUNT; i++) |
121 | dwTotalPackedBits += BitsRequired(m_rgUnpackedFields[i]); |
122 | |
123 | // Now we have the total is it smaller than a simple array of DWORDs? |
124 | if (dwTotalPackedBits >= (FIELD_COUNT * kBitsPerDWORD)) |
125 | return false; |
126 | |
127 | // Compaction will save us space. We're committed to implementing that compaction now. |
128 | |
129 | // Work from a copy of the unpacked fields since we're about to start modifying the space in which |
130 | // they're currently stored. |
131 | DWORD rgUnpackedFields[FIELD_COUNT]; |
132 | memcpy(rgUnpackedFields, m_rgUnpackedFields, sizeof(rgUnpackedFields)); |
133 | |
134 | // Start writing a stream of bits. For each field write a fixed sized header describing the number of |
135 | // bits required to express the field followed by the field value itself. |
136 | DWORD dwOffset = 0; |
137 | for (DWORD i = 0; i < FIELD_COUNT; i++) |
138 | { |
139 | // Find the minimal number of bits required to encode the current field's value. |
140 | DWORD dwFieldLength = BitsRequired(rgUnpackedFields[i]); |
141 | _ASSERTE(dwFieldLength > 0 && dwFieldLength <= kBitsPerDWORD); |
142 | |
143 | // Write the size field. Note that we store the size biased by one. That is, a field length of one |
144 | // is encoded as zero. We do this so we can express a range of field sizes from 1 through 32, |
145 | // emcompassing the worst case scenario (a full 32 bits). It comes at the cost of not being able |
146 | // to encode zero-valued fields with zero bits. Is this is deemed an important optimization in the |
147 | // future we could always given up on a simple linear mapping of the size field and use a lookup |
148 | // table to map values encoded into the real sizes. Experiments with EEClass packed fields over |
149 | // mscorlib show that this currently doesn't yield us much benefit, primarily due to the DWORD |
150 | // round-up size semantic, which implies we'd need a lot more optimization than this to reduce the |
151 | // average structure size below the next DWORD threshhold. |
152 | BitVectorSet(dwOffset, kMaxLengthBits, dwFieldLength - 1); |
153 | dwOffset += kMaxLengthBits; |
154 | |
155 | // Write the field value itself. |
156 | BitVectorSet(dwOffset, dwFieldLength, rgUnpackedFields[i]); |
157 | dwOffset += dwFieldLength; |
158 | } |
159 | |
160 | #ifdef _DEBUG |
161 | m_fFieldsPacked = true; |
162 | #endif // _DEBUG |
163 | |
164 | // Compaction was successful. |
165 | return true; |
166 | } |
167 | |
168 | // Return the size in bytes of a compacted structure (it is illegal to call this on an uncompacted |
169 | // structure). |
170 | DWORD GetPackedSize() |
171 | { |
172 | LIMITED_METHOD_DAC_CONTRACT; |
173 | |
174 | _ASSERTE(m_fFieldsPacked); |
175 | |
176 | // Walk the field stream reading header (which are fixed size) and then using the value of the headers |
177 | // to skip the field value. |
178 | DWORD cBits = 0; |
179 | for (DWORD i = 0; i < FIELD_COUNT; i++) |
180 | cBits += kMaxLengthBits + BitVectorGet(cBits, kMaxLengthBits) + 1; // +1 since size is [1,32] not [0,31] |
181 | |
182 | // Compute the number of DWORDs needed to store the bits of the encoding. |
183 | // static_cast would not be necessary if ALIGN_UP were templated like FitsIn. |
184 | DWORD cDWORDs = static_cast<DWORD>(ALIGN_UP(cBits, kBitsPerDWORD)) / kBitsPerDWORD; |
185 | |
186 | // Return the total structure size. |
187 | return offsetof(PackedDWORDFields<FIELD_COUNT>, m_rgPackedFields) + (cDWORDs * sizeof(DWORD)); |
188 | } |
189 | |
190 | // Get the value of a packed field. Illegal to call this on an uncompacted structure. |
191 | DWORD GetPackedField(DWORD dwFieldIndex) |
192 | { |
193 | CONTRACTL |
194 | { |
195 | NOTHROW; |
196 | GC_NOTRIGGER; |
197 | MODE_ANY; |
198 | SUPPORTS_DAC; |
199 | SO_TOLERANT; |
200 | } |
201 | CONTRACTL_END; |
202 | |
203 | _ASSERTE(dwFieldIndex < FIELD_COUNT); |
204 | _ASSERTE(m_fFieldsPacked); |
205 | |
206 | // Walk past all the predecessor fields. |
207 | DWORD dwOffset = 0; |
208 | for (DWORD i = 0; i < dwFieldIndex; i++) |
209 | dwOffset += kMaxLengthBits + BitVectorGet(dwOffset, kMaxLengthBits) + 1; // +1 since size is [1,32] not [0,31] |
210 | |
211 | // The next kMaxLengthBits bits contain the length in bits of the field we want (-1 due to the way we |
212 | // encode the length). |
213 | DWORD dwFieldLength = BitVectorGet(dwOffset, kMaxLengthBits) + 1; |
214 | dwOffset += kMaxLengthBits; |
215 | |
216 | // Grab the field value. |
217 | DWORD dwReturn = BitVectorGet(dwOffset, dwFieldLength); |
218 | |
219 | // On debug builds ensure the encoded field value is the same as the original unpacked version. |
220 | _ASSERTE(dwReturn == m_rgDebugUnpackedFields[dwFieldIndex]); |
221 | return dwReturn; |
222 | } |
223 | |
224 | private: |
225 | // Return the minimum number of bits required to encode a DWORD value by stripping out the |
226 | // most-significant leading zero bits). Returns a value between 1 and 32 inclusive (we never encode |
227 | // anything with zero bits). |
228 | DWORD BitsRequired(DWORD dwValue) |
229 | { |
230 | LIMITED_METHOD_CONTRACT; |
231 | |
232 | // Starting with a bit-mask of the most significant bit and iterating over masks for successively less |
233 | // significant bits, stop as soon as the mask co-incides with a set bit in the value. Simultaneously |
234 | // we're counting down the bits required to express the range of values implied by seeing the |
235 | // corresponding bit set in the value (e.g. when we're testing the high bit we know we'd need 32-bits |
236 | // to encode the range of values that have this bit set). Stop when we get to one bit (we never return |
237 | // 0 bits required, even for an input value of 0). |
238 | DWORD dwMask = 0x80000000; |
239 | DWORD cBits = 32; |
240 | while (cBits > 1) |
241 | { |
242 | if (dwValue & dwMask) |
243 | return cBits; |
244 | |
245 | dwMask >>= 1; |
246 | cBits--; |
247 | } |
248 | |
249 | return 1; |
250 | } |
251 | |
252 | // Set the dwLength bits at m_rgPackedFields + dwOffset bits to the value dwValue. |
253 | void BitVectorSet(DWORD dwOffset, DWORD dwLength, DWORD dwValue) |
254 | { |
255 | LIMITED_METHOD_CONTRACT; |
256 | |
257 | _ASSERTE(dwLength > 0 && dwLength <= kBitsPerDWORD); // Can set at most one DWORD at a time |
258 | _ASSERTE((dwLength == kBitsPerDWORD) || (dwValue < (1U << dwLength))); // Value had better fit in the given length |
259 | |
260 | // Calculate the start and end naturally aligned DWORDs into which the value will go. |
261 | DWORD dwStartBlock = dwOffset / kBitsPerDWORD; |
262 | DWORD dwEndBlock = (dwOffset + dwLength - 1) / kBitsPerDWORD; |
263 | if (dwStartBlock == dwEndBlock) |
264 | { |
265 | // Easy case: the new value fits entirely within one aligned DWORD. Compute the number of bits |
266 | // we'll need to shift the input value (to the left) and a mask of the bits that will be set in |
267 | // the destination DWORD. |
268 | DWORD dwValueShift = dwOffset % kBitsPerDWORD; |
269 | DWORD dwValueMask = ((1U << dwLength) - 1) << dwValueShift; |
270 | |
271 | m_rgPackedFields[dwStartBlock] &= ~dwValueMask; // Zero the target bits |
272 | m_rgPackedFields[dwStartBlock] |= dwValue << dwValueShift; // Or in the new value (suitably shifted) |
273 | } |
274 | else |
275 | { |
276 | // Hard case: the new value is split across two DWORDs (two DWORDs is the max as the new value can |
277 | // be at most DWORD-sized itself). For simplicity we'll simply break this into two separate |
278 | // non-spanning sets. We can revisit this in the future if the perf is a problem. |
279 | DWORD dwInitialBits = kBitsPerDWORD - (dwOffset % kBitsPerDWORD); // Number of bits to set in the first DWORD |
280 | DWORD dwInitialMask = (1U << dwInitialBits) - 1; // Mask covering those value bits |
281 | |
282 | // Set the portion of the value residing in the first DWORD. |
283 | BitVectorSet(dwOffset, dwInitialBits, dwValue & dwInitialMask); |
284 | |
285 | // And then the remainder in the second DWORD. |
286 | BitVectorSet(dwOffset + dwInitialBits, dwLength - dwInitialBits, dwValue >> dwInitialBits); |
287 | } |
288 | |
289 | _ASSERTE(BitVectorGet(dwOffset, dwLength) == dwValue); |
290 | } |
291 | |
292 | // Get the dwLength bits at m_rgPackedFields + dwOffset bits. Value is zero-extended to DWORD size. |
293 | DWORD BitVectorGet(DWORD dwOffset, DWORD dwLength) |
294 | { |
295 | LIMITED_METHOD_DAC_CONTRACT; |
296 | |
297 | _ASSERTE(dwLength > 0 && dwLength <= kBitsPerDWORD); // Can get at most one DWORD at a time |
298 | |
299 | // Calculate the start and end naturally aligned DWORDs from which the value will come. |
300 | DWORD dwStartBlock = dwOffset / kBitsPerDWORD; |
301 | DWORD dwEndBlock = (dwOffset + dwLength - 1) / kBitsPerDWORD; |
302 | if (dwStartBlock == dwEndBlock) |
303 | { |
304 | // Easy case: the new value fits entirely within one aligned DWORD. Compute the number of bits |
305 | // we'll need to shift the extracted value (to the right) and a mask of the bits that will be |
306 | // extracted in the destination DWORD. |
307 | DWORD dwValueShift = dwOffset % kBitsPerDWORD; |
308 | DWORD dwValueMask = ((1U << dwLength) - 1) << dwValueShift; |
309 | |
310 | // Mask out the bits we want and shift them down into the bottom of the result DWORD. |
311 | return (m_rgPackedFields[dwStartBlock] & dwValueMask) >> dwValueShift; |
312 | } |
313 | else |
314 | { |
315 | // Hard case: the return value is split across two DWORDs (two DWORDs is the max as the new value |
316 | // can be at most DWORD-sized itself). For simplicity we'll simply break this into two separate |
317 | // non-spanning gets and stitch the result together from that. We can revisit this in the future |
318 | // if the perf is a problem. |
319 | DWORD dwInitialBits = kBitsPerDWORD - (dwOffset % kBitsPerDWORD); // Number of bits to get in the first DWORD |
320 | DWORD dwReturn; |
321 | |
322 | // Get the initial (low-order) bits from the first DWORD. |
323 | dwReturn = BitVectorGet(dwOffset, dwInitialBits); |
324 | |
325 | // Get the remaining bits from the second DWORD. These bits will need to be shifted to the left |
326 | // (past the bits we've already read) before being OR'd into the result. |
327 | dwReturn |= BitVectorGet(dwOffset + dwInitialBits, dwLength - dwInitialBits) << dwInitialBits; |
328 | |
329 | return dwReturn; |
330 | } |
331 | } |
332 | |
333 | #ifdef _DEBUG |
334 | DWORD m_rgDebugUnpackedFields[FIELD_COUNT]; // A copy of the unpacked fields so we can validate |
335 | // packed reads |
336 | bool m_fFieldsPacked; // The current packed/unpacked state so we can check |
337 | // the right methods are being called |
338 | #endif // _DEBUG |
339 | |
340 | union |
341 | { |
342 | DWORD m_rgUnpackedFields[FIELD_COUNT]; // The fields in their unpacked state |
343 | DWORD m_rgPackedFields[1]; // The first DWORD block of fields in the packed state |
344 | }; |
345 | }; |
346 | |