1/*
2 * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef SHARE_CLASSFILE_COMPACTHASHTABLE_HPP
26#define SHARE_CLASSFILE_COMPACTHASHTABLE_HPP
27
28#include "oops/array.hpp"
29#include "oops/symbol.hpp"
30#include "utilities/growableArray.hpp"
31
32
33template <
34 typename K,
35 typename V,
36 V (*DECODE)(address base_address, u4 offset),
37 bool (*EQUALS)(V value, K key, int len)
38 >
39class CompactHashtable;
40class NumberSeq;
41class SimpleCompactHashtable;
42class SerializeClosure;
43
44// Stats for symbol tables in the CDS archive
45class CompactHashtableStats {
46public:
47 int hashentry_count;
48 int hashentry_bytes;
49 int bucket_count;
50 int bucket_bytes;
51};
52
53#if INCLUDE_CDS
54/////////////////////////////////////////////////////////////////////////
55//
56// The compact hash table writer. Used at dump time for writing out
57// the compact table to the shared archive.
58//
59// At dump time, the CompactHashtableWriter obtains all entries from the
60// symbol/string table and adds them to a new temporary hash table. The hash
61// table size (number of buckets) is calculated using
62// '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
63// size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
64// 4 is chosen because it produces smaller sized bucket on average for
65// faster lookup. It also has relatively small number of empty buckets and
66// good distribution of the entries.
67//
68// We use a simple hash function (hash % num_bucket) for the table.
69// The new table is compacted when written out. Please see comments
70// above the CompactHashtable class for the table layout detail. The bucket
71// offsets are written to the archive as part of the compact table. The
72// bucket offset is encoded in the low 30-bit (0-29) and the bucket type
73// (regular or compact) are encoded in bit[31, 30]. For buckets with more
74// than one entry, both hash and entry offset are written to the
75// table. For buckets with only one entry, only the entry offset is written
76// to the table and the buckets are tagged as compact in their type bits.
77// Buckets without entry are skipped from the table. Their offsets are
78// still written out for faster lookup.
79//
80class CompactHashtableWriter: public StackObj {
81public:
82 class Entry {
83 unsigned int _hash;
84 u4 _value;
85
86 public:
87 Entry() {}
88 Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}
89
90 u4 value() {
91 return _value;
92 }
93 unsigned int hash() {
94 return _hash;
95 }
96
97 bool operator==(const CompactHashtableWriter::Entry& other) {
98 return (_value == other._value && _hash == other._hash);
99 }
100 }; // class CompactHashtableWriter::Entry
101
102private:
103 int _num_entries_written;
104 int _num_buckets;
105 int _num_empty_buckets;
106 int _num_value_only_buckets;
107 int _num_other_buckets;
108 GrowableArray<Entry>** _buckets;
109 CompactHashtableStats* _stats;
110 Array<u4>* _compact_buckets;
111 Array<u4>* _compact_entries;
112
113public:
114 // This is called at dump-time only
115 CompactHashtableWriter(int num_entries, CompactHashtableStats* stats);
116 ~CompactHashtableWriter();
117
118 void add(unsigned int hash, u4 value);
119
120private:
121 void allocate_table();
122 void dump_table(NumberSeq* summary);
123 static int calculate_num_buckets(int num_entries) {
124 int num_buckets = num_entries / SharedSymbolTableBucketSize;
125 // calculation of num_buckets can result in zero buckets, we need at least one
126 return (num_buckets < 1) ? 1 : num_buckets;
127 }
128
129public:
130 void dump(SimpleCompactHashtable *cht, const char* table_name);
131
132 static size_t estimate_size(int num_entries);
133};
134#endif // INCLUDE_CDS
135
136#define REGULAR_BUCKET_TYPE 0
137#define VALUE_ONLY_BUCKET_TYPE 1
138#define TABLEEND_BUCKET_TYPE 3
139#define BUCKET_OFFSET_MASK 0x3FFFFFFF
140#define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK)
141#define BUCKET_TYPE_SHIFT 30
142#define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
143#define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
144
145/////////////////////////////////////////////////////////////////////////////
146//
147// CompactHashtable is used to store the CDS archive's symbol/string tables.
148//
149// Because these tables are read-only (no entries can be added/deleted) at run-time
150// and tend to have large number of entries, we try to minimize the footprint
151// cost per entry.
152//
153// The CompactHashtable is split into two arrays
154//
155// u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
156// u4 entries[<variable size>]
157//
158// The size of buckets[] is 'num_buckets + 1'. Each entry of
159// buckets[] is a 32-bit encoding of the bucket type and bucket offset,
160// with the type in the left-most 2-bit and offset in the remaining 30-bit.
161// The last entry is a special type. It contains the end of the last
162// bucket.
163//
164// There are two types of buckets, regular buckets and value_only buckets. The
165// value_only buckets have '01' in their highest 2-bit, and regular buckets have
166// '00' in their highest 2-bit.
167//
168// For normal buckets, each entry is 8 bytes in the entries[]:
169// u4 hash; /* symbol/string hash */
170// union {
171// u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */
172// narrowOop str; /* String narrowOop encoding */
173// }
174//
175//
176// For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].
177//
178// Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code
179// is skipped.
180// buckets[0, 4, 5, ....]
181// | | |
182// | | +---+
183// | | |
184// | +----+ |
185// v v v
186// entries[H,O,H,O,O,H,O,H,O.....]
187//
188// See CompactHashtable::lookup() for how the table is searched at runtime.
189// See CompactHashtableWriter::dump() for how the table is written at CDS
190// dump time.
191//
192class SimpleCompactHashtable {
193protected:
194 address _base_address;
195 u4 _bucket_count;
196 u4 _entry_count;
197 u4* _buckets;
198 u4* _entries;
199
200public:
201 SimpleCompactHashtable() {
202 _entry_count = 0;
203 _bucket_count = 0;
204 _buckets = 0;
205 _entries = 0;
206 }
207
208 void reset() {
209 _bucket_count = 0;
210 _entry_count = 0;
211 _buckets = 0;
212 _entries = 0;
213 }
214
215 void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries);
216
217 // Read/Write the table's header from/to the CDS archive
218 void serialize_header(SerializeClosure* soc) NOT_CDS_RETURN;
219
220 inline bool empty() {
221 return (_entry_count == 0);
222 }
223
224 static size_t calculate_header_size();
225};
226
227template <
228 typename K,
229 typename V,
230 V (*DECODE)(address base_address, u4 offset),
231 bool (*EQUALS)(V value, K key, int len)
232 >
233class CompactHashtable : public SimpleCompactHashtable {
234 friend class VMStructs;
235
236 V decode(u4 offset) const {
237 return DECODE(_base_address, offset);
238 }
239
240public:
241 // Lookup a value V from the compact table using key K
242 inline V lookup(K key, unsigned int hash, int len) const {
243 if (_entry_count > 0) {
244 int index = hash % _bucket_count;
245 u4 bucket_info = _buckets[index];
246 u4 bucket_offset = BUCKET_OFFSET(bucket_info);
247 int bucket_type = BUCKET_TYPE(bucket_info);
248 u4* entry = _entries + bucket_offset;
249
250 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
251 V value = decode(entry[0]);
252 if (EQUALS(value, key, len)) {
253 return value;
254 }
255 } else {
256 // This is a regular bucket, which has more than one
257 // entries. Each entry is a pair of entry (hash, offset).
258 // Seek until the end of the bucket.
259 u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]);
260 while (entry < entry_max) {
261 unsigned int h = (unsigned int)(entry[0]);
262 if (h == hash) {
263 V value = decode(entry[1]);
264 if (EQUALS(value, key, len)) {
265 return value;
266 }
267 }
268 entry += 2;
269 }
270 }
271 }
272 return NULL;
273 }
274
275 template <class ITER>
276 inline void iterate(ITER* iter) const {
277 for (u4 i = 0; i < _bucket_count; i++) {
278 u4 bucket_info = _buckets[i];
279 u4 bucket_offset = BUCKET_OFFSET(bucket_info);
280 int bucket_type = BUCKET_TYPE(bucket_info);
281 u4* entry = _entries + bucket_offset;
282
283 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
284 iter->do_value(decode(entry[0]));
285 } else {
286 u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]);
287 while (entry < entry_max) {
288 iter->do_value(decode(entry[1]));
289 entry += 2;
290 }
291 }
292 }
293 }
294
295 void print_table_statistics(outputStream* st, const char* name) {
296 st->print_cr("%s statistics:", name);
297 int total_entries = 0;
298 int max_bucket = 0;
299 for (u4 i = 0; i < _bucket_count; i++) {
300 u4 bucket_info = _buckets[i];
301 int bucket_type = BUCKET_TYPE(bucket_info);
302 int bucket_size;
303
304 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
305 bucket_size = 1;
306 } else {
307 bucket_size = (BUCKET_OFFSET(_buckets[i + 1]) - BUCKET_OFFSET(bucket_info)) / 2;
308 }
309 total_entries += bucket_size;
310 if (max_bucket < bucket_size) {
311 max_bucket = bucket_size;
312 }
313 }
314 st->print_cr("Number of buckets : %9d", _bucket_count);
315 st->print_cr("Number of entries : %9d", total_entries);
316 st->print_cr("Maximum bucket size : %9d", max_bucket);
317 }
318};
319
320////////////////////////////////////////////////////////////////////////
321//
322// OffsetCompactHashtable -- This is used to store many types of objects
323// in the CDS archive. On 64-bit platforms, we save space by using a 32-bit
324// offset from the CDS base address.
325
326template <typename V>
327inline V read_value_from_compact_hashtable(address base_address, u4 offset) {
328 return (V)(base_address + offset);
329}
330
331template <
332 typename K,
333 typename V,
334 bool (*EQUALS)(V value, K key, int len)
335 >
336class OffsetCompactHashtable : public CompactHashtable<
337 K, V, read_value_from_compact_hashtable<V>, EQUALS> {
338};
339
340
341////////////////////////////////////////////////////////////////////////
342//
343// Read/Write the contents of a hashtable textual dump (created by
344// SymbolTable::dump and StringTable::dump).
345// Because the dump file may be big (hundred of MB in extreme cases),
346// we use mmap for fast access when reading it.
347//
348class HashtableTextDump {
349 int _fd;
350 const char* _base;
351 const char* _p;
352 const char* _end;
353 const char* _filename;
354 size_t _size;
355 int _prefix_type;
356 int _line_no;
357public:
358 HashtableTextDump(const char* filename);
359 ~HashtableTextDump();
360
361 enum {
362 SymbolPrefix = 1 << 0,
363 StringPrefix = 1 << 1,
364 Unknown = 1 << 2
365 };
366
367 void quit(const char* err, const char* msg);
368
369 inline int remain() {
370 return (int)(_end - _p);
371 }
372 int last_line_no() {
373 return _line_no - 1;
374 }
375
376 void corrupted(const char *p, const char *msg);
377
378 inline void corrupted_if(bool cond, const char *msg) {
379 if (cond) {
380 corrupted(_p, msg);
381 }
382 }
383
384 bool skip_newline();
385 int skip(char must_be_char);
386 void skip_past(char c);
387 void check_version(const char* ver);
388
389 inline void get_num(char delim, int *num) {
390 const char* p = _p;
391 const char* end = _end;
392 u8 n = 0;
393
394 while (p < end) {
395 char c = *p++;
396 if ('0' <= c && c <= '9') {
397 n = n * 10 + (c - '0');
398 if (n > (u8)INT_MAX) {
399 corrupted(_p, "Num overflow");
400 }
401 } else if (c == delim) {
402 _p = p;
403 *num = (int)n;
404 return;
405 } else {
406 // Not [0-9], not 'delim'
407 corrupted(_p, "Unrecognized format");;
408 }
409 }
410
411 corrupted(_end, "Incorrect format");
412 ShouldNotReachHere();
413 }
414
415 void scan_prefix_type();
416 int scan_prefix(int* utf8_length);
417 int scan_string_prefix();
418 int scan_symbol_prefix();
419
420 jchar unescape(const char* from, const char* end, int count);
421 void get_utf8(char* utf8_buffer, int utf8_length);
422 static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
423};
424
425#endif // SHARE_CLASSFILE_COMPACTHASHTABLE_HPP
426