1 | /* |
2 | * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #ifndef SHARE_CLASSFILE_COMPACTHASHTABLE_HPP |
26 | #define SHARE_CLASSFILE_COMPACTHASHTABLE_HPP |
27 | |
28 | #include "oops/array.hpp" |
29 | #include "oops/symbol.hpp" |
30 | #include "utilities/growableArray.hpp" |
31 | |
32 | |
33 | template < |
34 | typename K, |
35 | typename V, |
36 | V (*DECODE)(address base_address, u4 offset), |
37 | bool (*EQUALS)(V value, K key, int len) |
38 | > |
39 | class CompactHashtable; |
40 | class NumberSeq; |
41 | class SimpleCompactHashtable; |
42 | class SerializeClosure; |
43 | |
44 | // Stats for symbol tables in the CDS archive |
45 | class CompactHashtableStats { |
46 | public: |
47 | int hashentry_count; |
48 | int hashentry_bytes; |
49 | int bucket_count; |
50 | int bucket_bytes; |
51 | }; |
52 | |
53 | #if INCLUDE_CDS |
54 | ///////////////////////////////////////////////////////////////////////// |
55 | // |
56 | // The compact hash table writer. Used at dump time for writing out |
57 | // the compact table to the shared archive. |
58 | // |
59 | // At dump time, the CompactHashtableWriter obtains all entries from the |
60 | // symbol/string table and adds them to a new temporary hash table. The hash |
61 | // table size (number of buckets) is calculated using |
62 | // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket |
63 | // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option. |
64 | // 4 is chosen because it produces smaller sized bucket on average for |
65 | // faster lookup. It also has relatively small number of empty buckets and |
66 | // good distribution of the entries. |
67 | // |
68 | // We use a simple hash function (hash % num_bucket) for the table. |
69 | // The new table is compacted when written out. Please see comments |
70 | // above the CompactHashtable class for the table layout detail. The bucket |
71 | // offsets are written to the archive as part of the compact table. The |
72 | // bucket offset is encoded in the low 30-bit (0-29) and the bucket type |
73 | // (regular or compact) are encoded in bit[31, 30]. For buckets with more |
74 | // than one entry, both hash and entry offset are written to the |
75 | // table. For buckets with only one entry, only the entry offset is written |
76 | // to the table and the buckets are tagged as compact in their type bits. |
77 | // Buckets without entry are skipped from the table. Their offsets are |
78 | // still written out for faster lookup. |
79 | // |
80 | class CompactHashtableWriter: public StackObj { |
81 | public: |
82 | class Entry { |
83 | unsigned int _hash; |
84 | u4 _value; |
85 | |
86 | public: |
87 | Entry() {} |
88 | Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {} |
89 | |
90 | u4 value() { |
91 | return _value; |
92 | } |
93 | unsigned int hash() { |
94 | return _hash; |
95 | } |
96 | |
97 | bool operator==(const CompactHashtableWriter::Entry& other) { |
98 | return (_value == other._value && _hash == other._hash); |
99 | } |
100 | }; // class CompactHashtableWriter::Entry |
101 | |
102 | private: |
103 | int _num_entries_written; |
104 | int _num_buckets; |
105 | int _num_empty_buckets; |
106 | int _num_value_only_buckets; |
107 | int _num_other_buckets; |
108 | GrowableArray<Entry>** _buckets; |
109 | CompactHashtableStats* _stats; |
110 | Array<u4>* _compact_buckets; |
111 | Array<u4>* _compact_entries; |
112 | |
113 | public: |
114 | // This is called at dump-time only |
115 | CompactHashtableWriter(int num_entries, CompactHashtableStats* stats); |
116 | ~CompactHashtableWriter(); |
117 | |
118 | void add(unsigned int hash, u4 value); |
119 | |
120 | private: |
121 | void allocate_table(); |
122 | void dump_table(NumberSeq* summary); |
123 | static int calculate_num_buckets(int num_entries) { |
124 | int num_buckets = num_entries / SharedSymbolTableBucketSize; |
125 | // calculation of num_buckets can result in zero buckets, we need at least one |
126 | return (num_buckets < 1) ? 1 : num_buckets; |
127 | } |
128 | |
129 | public: |
130 | void dump(SimpleCompactHashtable *cht, const char* table_name); |
131 | |
132 | static size_t estimate_size(int num_entries); |
133 | }; |
134 | #endif // INCLUDE_CDS |
135 | |
136 | #define REGULAR_BUCKET_TYPE 0 |
137 | #define VALUE_ONLY_BUCKET_TYPE 1 |
138 | #define TABLEEND_BUCKET_TYPE 3 |
139 | #define BUCKET_OFFSET_MASK 0x3FFFFFFF |
140 | #define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK) |
141 | #define BUCKET_TYPE_SHIFT 30 |
142 | #define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT) |
143 | #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK)) |
144 | |
145 | ///////////////////////////////////////////////////////////////////////////// |
146 | // |
147 | // CompactHashtable is used to store the CDS archive's symbol/string tables. |
148 | // |
149 | // Because these tables are read-only (no entries can be added/deleted) at run-time |
150 | // and tend to have large number of entries, we try to minimize the footprint |
151 | // cost per entry. |
152 | // |
153 | // The CompactHashtable is split into two arrays |
154 | // |
155 | // u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset |
156 | // u4 entries[<variable size>] |
157 | // |
158 | // The size of buckets[] is 'num_buckets + 1'. Each entry of |
159 | // buckets[] is a 32-bit encoding of the bucket type and bucket offset, |
160 | // with the type in the left-most 2-bit and offset in the remaining 30-bit. |
161 | // The last entry is a special type. It contains the end of the last |
162 | // bucket. |
163 | // |
164 | // There are two types of buckets, regular buckets and value_only buckets. The |
165 | // value_only buckets have '01' in their highest 2-bit, and regular buckets have |
166 | // '00' in their highest 2-bit. |
167 | // |
168 | // For normal buckets, each entry is 8 bytes in the entries[]: |
169 | // u4 hash; /* symbol/string hash */ |
170 | // union { |
171 | // u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */ |
172 | // narrowOop str; /* String narrowOop encoding */ |
173 | // } |
174 | // |
175 | // |
176 | // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[]. |
177 | // |
178 | // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code |
179 | // is skipped. |
180 | // buckets[0, 4, 5, ....] |
181 | // | | | |
182 | // | | +---+ |
183 | // | | | |
184 | // | +----+ | |
185 | // v v v |
186 | // entries[H,O,H,O,O,H,O,H,O.....] |
187 | // |
188 | // See CompactHashtable::lookup() for how the table is searched at runtime. |
189 | // See CompactHashtableWriter::dump() for how the table is written at CDS |
190 | // dump time. |
191 | // |
192 | class SimpleCompactHashtable { |
193 | protected: |
194 | address _base_address; |
195 | u4 _bucket_count; |
196 | u4 _entry_count; |
197 | u4* _buckets; |
198 | u4* _entries; |
199 | |
200 | public: |
201 | SimpleCompactHashtable() { |
202 | _entry_count = 0; |
203 | _bucket_count = 0; |
204 | _buckets = 0; |
205 | _entries = 0; |
206 | } |
207 | |
208 | void reset() { |
209 | _bucket_count = 0; |
210 | _entry_count = 0; |
211 | _buckets = 0; |
212 | _entries = 0; |
213 | } |
214 | |
215 | void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries); |
216 | |
217 | // Read/Write the table's header from/to the CDS archive |
218 | void (SerializeClosure* soc) NOT_CDS_RETURN; |
219 | |
220 | inline bool empty() { |
221 | return (_entry_count == 0); |
222 | } |
223 | |
224 | static size_t (); |
225 | }; |
226 | |
227 | template < |
228 | typename K, |
229 | typename V, |
230 | V (*DECODE)(address base_address, u4 offset), |
231 | bool (*EQUALS)(V value, K key, int len) |
232 | > |
233 | class CompactHashtable : public SimpleCompactHashtable { |
234 | friend class VMStructs; |
235 | |
236 | V decode(u4 offset) const { |
237 | return DECODE(_base_address, offset); |
238 | } |
239 | |
240 | public: |
241 | // Lookup a value V from the compact table using key K |
242 | inline V lookup(K key, unsigned int hash, int len) const { |
243 | if (_entry_count > 0) { |
244 | int index = hash % _bucket_count; |
245 | u4 bucket_info = _buckets[index]; |
246 | u4 bucket_offset = BUCKET_OFFSET(bucket_info); |
247 | int bucket_type = BUCKET_TYPE(bucket_info); |
248 | u4* entry = _entries + bucket_offset; |
249 | |
250 | if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { |
251 | V value = decode(entry[0]); |
252 | if (EQUALS(value, key, len)) { |
253 | return value; |
254 | } |
255 | } else { |
256 | // This is a regular bucket, which has more than one |
257 | // entries. Each entry is a pair of entry (hash, offset). |
258 | // Seek until the end of the bucket. |
259 | u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]); |
260 | while (entry < entry_max) { |
261 | unsigned int h = (unsigned int)(entry[0]); |
262 | if (h == hash) { |
263 | V value = decode(entry[1]); |
264 | if (EQUALS(value, key, len)) { |
265 | return value; |
266 | } |
267 | } |
268 | entry += 2; |
269 | } |
270 | } |
271 | } |
272 | return NULL; |
273 | } |
274 | |
275 | template <class ITER> |
276 | inline void iterate(ITER* iter) const { |
277 | for (u4 i = 0; i < _bucket_count; i++) { |
278 | u4 bucket_info = _buckets[i]; |
279 | u4 bucket_offset = BUCKET_OFFSET(bucket_info); |
280 | int bucket_type = BUCKET_TYPE(bucket_info); |
281 | u4* entry = _entries + bucket_offset; |
282 | |
283 | if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { |
284 | iter->do_value(decode(entry[0])); |
285 | } else { |
286 | u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]); |
287 | while (entry < entry_max) { |
288 | iter->do_value(decode(entry[1])); |
289 | entry += 2; |
290 | } |
291 | } |
292 | } |
293 | } |
294 | |
295 | void print_table_statistics(outputStream* st, const char* name) { |
296 | st->print_cr("%s statistics:" , name); |
297 | int total_entries = 0; |
298 | int max_bucket = 0; |
299 | for (u4 i = 0; i < _bucket_count; i++) { |
300 | u4 bucket_info = _buckets[i]; |
301 | int bucket_type = BUCKET_TYPE(bucket_info); |
302 | int bucket_size; |
303 | |
304 | if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { |
305 | bucket_size = 1; |
306 | } else { |
307 | bucket_size = (BUCKET_OFFSET(_buckets[i + 1]) - BUCKET_OFFSET(bucket_info)) / 2; |
308 | } |
309 | total_entries += bucket_size; |
310 | if (max_bucket < bucket_size) { |
311 | max_bucket = bucket_size; |
312 | } |
313 | } |
314 | st->print_cr("Number of buckets : %9d" , _bucket_count); |
315 | st->print_cr("Number of entries : %9d" , total_entries); |
316 | st->print_cr("Maximum bucket size : %9d" , max_bucket); |
317 | } |
318 | }; |
319 | |
320 | //////////////////////////////////////////////////////////////////////// |
321 | // |
322 | // OffsetCompactHashtable -- This is used to store many types of objects |
323 | // in the CDS archive. On 64-bit platforms, we save space by using a 32-bit |
324 | // offset from the CDS base address. |
325 | |
326 | template <typename V> |
327 | inline V read_value_from_compact_hashtable(address base_address, u4 offset) { |
328 | return (V)(base_address + offset); |
329 | } |
330 | |
331 | template < |
332 | typename K, |
333 | typename V, |
334 | bool (*EQUALS)(V value, K key, int len) |
335 | > |
336 | class OffsetCompactHashtable : public CompactHashtable< |
337 | K, V, read_value_from_compact_hashtable<V>, EQUALS> { |
338 | }; |
339 | |
340 | |
341 | //////////////////////////////////////////////////////////////////////// |
342 | // |
343 | // Read/Write the contents of a hashtable textual dump (created by |
344 | // SymbolTable::dump and StringTable::dump). |
345 | // Because the dump file may be big (hundred of MB in extreme cases), |
346 | // we use mmap for fast access when reading it. |
347 | // |
348 | class HashtableTextDump { |
349 | int _fd; |
350 | const char* _base; |
351 | const char* _p; |
352 | const char* _end; |
353 | const char* _filename; |
354 | size_t _size; |
355 | int _prefix_type; |
356 | int _line_no; |
357 | public: |
358 | HashtableTextDump(const char* filename); |
359 | ~HashtableTextDump(); |
360 | |
361 | enum { |
362 | SymbolPrefix = 1 << 0, |
363 | StringPrefix = 1 << 1, |
364 | Unknown = 1 << 2 |
365 | }; |
366 | |
367 | void quit(const char* err, const char* msg); |
368 | |
369 | inline int remain() { |
370 | return (int)(_end - _p); |
371 | } |
372 | int last_line_no() { |
373 | return _line_no - 1; |
374 | } |
375 | |
376 | void corrupted(const char *p, const char *msg); |
377 | |
378 | inline void corrupted_if(bool cond, const char *msg) { |
379 | if (cond) { |
380 | corrupted(_p, msg); |
381 | } |
382 | } |
383 | |
384 | bool skip_newline(); |
385 | int skip(char must_be_char); |
386 | void skip_past(char c); |
387 | void check_version(const char* ver); |
388 | |
389 | inline void get_num(char delim, int *num) { |
390 | const char* p = _p; |
391 | const char* end = _end; |
392 | u8 n = 0; |
393 | |
394 | while (p < end) { |
395 | char c = *p++; |
396 | if ('0' <= c && c <= '9') { |
397 | n = n * 10 + (c - '0'); |
398 | if (n > (u8)INT_MAX) { |
399 | corrupted(_p, "Num overflow" ); |
400 | } |
401 | } else if (c == delim) { |
402 | _p = p; |
403 | *num = (int)n; |
404 | return; |
405 | } else { |
406 | // Not [0-9], not 'delim' |
407 | corrupted(_p, "Unrecognized format" );; |
408 | } |
409 | } |
410 | |
411 | corrupted(_end, "Incorrect format" ); |
412 | ShouldNotReachHere(); |
413 | } |
414 | |
415 | void scan_prefix_type(); |
416 | int scan_prefix(int* utf8_length); |
417 | int scan_string_prefix(); |
418 | int scan_symbol_prefix(); |
419 | |
420 | jchar unescape(const char* from, const char* end, int count); |
421 | void get_utf8(char* utf8_buffer, int utf8_length); |
422 | static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length); |
423 | }; |
424 | |
425 | #endif // SHARE_CLASSFILE_COMPACTHASHTABLE_HPP |
426 | |