compactHashtable.hpp source code [OpenJDK/src/hotspot/share/classfile/compactHashtable.hpp]

1	/*
2	* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4	*
5	* This code is free software; you can redistribute it and/or modify it
6	* under the terms of the GNU General Public License version 2 only, as
7	* published by the Free Software Foundation.
8	*
9	* This code is distributed in the hope that it will be useful, but WITHOUT
10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12	* version 2 for more details (a copy is included in the LICENSE file that
13	* accompanied this code).
14	*
15	* You should have received a copy of the GNU General Public License version
16	* 2 along with this work; if not, write to the Free Software Foundation,
17	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18	*
19	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20	* or visit www.oracle.com if you need additional information or have any
21	* questions.
22	*
23	*/
24
25	#ifndef SHARE_CLASSFILE_COMPACTHASHTABLE_HPP
26	#define SHARE_CLASSFILE_COMPACTHASHTABLE_HPP
27
28	#include "oops/array.hpp"
29	#include "oops/symbol.hpp"
30	#include "utilities/growableArray.hpp"
31
32
33	template <
34	typename K,
35	typename V,
36	V (*DECODE)(address base_address, u4 offset),
37	bool (EQUALS)(V value, K key, int* len)
38	>
39	class CompactHashtable;
40	class NumberSeq;
41	class SimpleCompactHashtable;
42	class SerializeClosure;
43
44	// Stats for symbol tables in the CDS archive
45	class CompactHashtableStats {
46	public:
47	int hashentry_count;
48	int hashentry_bytes;
49	int bucket_count;
50	int bucket_bytes;
51	};
52
53	#if INCLUDE_CDS
54	/////////////////////////////////////////////////////////////////////////
55	//
56	// The compact hash table writer. Used at dump time for writing out
57	// the compact table to the shared archive.
58	//
59	// At dump time, the CompactHashtableWriter obtains all entries from the
60	// symbol/string table and adds them to a new temporary hash table. The hash
61	// table size (number of buckets) is calculated using
62	// '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
63	// size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
64	// 4 is chosen because it produces smaller sized bucket on average for
65	// faster lookup. It also has relatively small number of empty buckets and
66	// good distribution of the entries.
67	//
68	// We use a simple hash function (hash % num_bucket) for the table.
69	// The new table is compacted when written out. Please see comments
70	// above the CompactHashtable class for the table layout detail. The bucket
71	// offsets are written to the archive as part of the compact table. The
72	// bucket offset is encoded in the low 30-bit (0-29) and the bucket type
73	// (regular or compact) are encoded in bit[31, 30]. For buckets with more
74	// than one entry, both hash and entry offset are written to the
75	// table. For buckets with only one entry, only the entry offset is written
76	// to the table and the buckets are tagged as compact in their type bits.
77	// Buckets without entry are skipped from the table. Their offsets are
78	// still written out for faster lookup.
79	//
80	class CompactHashtableWriter: public StackObj {
81	public:
82	class Entry {
83	unsigned int _hash;
84	u4 _value;
85
86	public:
87	Entry() {}
88	Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}
89
90	u4 value() {
91	return _value;
92	}
93	unsigned int hash() {
94	return _hash;
95	}
96
97	bool operator==(const CompactHashtableWriter::Entry& other) {
98	return (_value == other._value && _hash == other._hash);
99	}
100	}; // class CompactHashtableWriter::Entry
101
102	private:
103	int _num_entries_written;
104	int _num_buckets;
105	int _num_empty_buckets;
106	int _num_value_only_buckets;
107	int _num_other_buckets;
108	GrowableArray<Entry>** _buckets;
109	CompactHashtableStats* _stats;
110	Array<u4>* _compact_buckets;
111	Array<u4>* _compact_entries;
112
113	public:
114	// This is called at dump-time only
115	CompactHashtableWriter(int num_entries, CompactHashtableStats* stats);
116	~CompactHashtableWriter();
117
118	void add(unsigned int hash, u4 value);
119
120	private:
121	void allocate_table();
122	void dump_table(NumberSeq* summary);
123	static int calculate_num_buckets(int num_entries) {
124	int num_buckets = num_entries / SharedSymbolTableBucketSize;
125	// calculation of num_buckets can result in zero buckets, we need at least one
126	return (num_buckets < `1`) ? `1` : num_buckets;
127	}
128
129	public:
130	void dump(SimpleCompactHashtable cht, const* char* table_name);
131
132	static size_t estimate_size(int num_entries);
133	};
134	#endif // INCLUDE_CDS
135
136	#define REGULAR_BUCKET_TYPE 0
137	#define VALUE_ONLY_BUCKET_TYPE 1
138	#define TABLEEND_BUCKET_TYPE 3
139	#define BUCKET_OFFSET_MASK 0x3FFFFFFF
140	#define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK)
141	#define BUCKET_TYPE_SHIFT 30
142	#define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
143	#define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) \| ((offset) & BUCKET_OFFSET_MASK))
144
145	/////////////////////////////////////////////////////////////////////////////
146	//
147	// CompactHashtable is used to store the CDS archive's symbol/string tables.
148	//
149	// Because these tables are read-only (no entries can be added/deleted) at run-time
150	// and tend to have large number of entries, we try to minimize the footprint
151	// cost per entry.
152	//
153	// The CompactHashtable is split into two arrays
154	//
155	// u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
156	// u4 entries[<variable size>]
157	//
158	// The size of buckets[] is 'num_buckets + 1'. Each entry of
159	// buckets[] is a 32-bit encoding of the bucket type and bucket offset,
160	// with the type in the left-most 2-bit and offset in the remaining 30-bit.
161	// The last entry is a special type. It contains the end of the last
162	// bucket.
163	//
164	// There are two types of buckets, regular buckets and value_only buckets. The
165	// value_only buckets have '01' in their highest 2-bit, and regular buckets have
166	// '00' in their highest 2-bit.
167	//
168	// For normal buckets, each entry is 8 bytes in the entries[]:
169	// u4 hash; / symbol/string hash /
170	// union {
171	// u4 offset; / Symbol* sym = (Symbol)(base_address + offset) /*
172	// narrowOop str; / String narrowOop encoding /
173	// }
174	//
175	//
176	// For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].
177	//
178	// Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code
179	// is skipped.
180	// buckets[0, 4, 5, ....]
181	// \| \| \|
182	// \| \| +---+
183	// \| \| \|
184	// \| +----+ \|
185	// v v v
186	// entries[H,O,H,O,O,H,O,H,O.....]
187	//
188	// See CompactHashtable::lookup() for how the table is searched at runtime.
189	// See CompactHashtableWriter::dump() for how the table is written at CDS
190	// dump time.
191	//
192	class SimpleCompactHashtable {
193	protected:
194	address _base_address;
195	u4 _bucket_count;
196	u4 _entry_count;
197	u4* _buckets;
198	u4* _entries;
199
200	public:
201	SimpleCompactHashtable() {
202	_entry_count = `0`;
203	_bucket_count = `0`;
204	_buckets = `0`;
205	_entries = `0`;
206	}
207
208	void reset() {
209	_bucket_count = `0`;
210	_entry_count = `0`;
211	_buckets = `0`;
212	_entries = `0`;
213	}
214
215	void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries);
216
217	// Read/Write the table's header from/to the CDS archive
218	void serialize_header(SerializeClosure* soc) NOT_CDS_RETURN;
219
220	inline bool empty() {
221	return (_entry_count == `0`);
222	}
223
224	static size_t calculate_header_size();
225	};
226
227	template <
228	typename K,
229	typename V,
230	V (*DECODE)(address base_address, u4 offset),
231	bool (EQUALS)(V value, K key, int* len)
232	>
233	class CompactHashtable : public SimpleCompactHashtable {
234	friend class VMStructs;
235
236	V decode(u4 offset) const {
237	return DECODE(_base_address, offset);
238	}
239
240	public:
241	// Lookup a value V from the compact table using key K
242	inline V lookup(K key, unsigned int hash, int len) const {
243	if (_entry_count > `0`) {
244	int index = hash % _bucket_count;
245	u4 bucket_info = _buckets[index];
246	u4 bucket_offset = BUCKET_OFFSET(bucket_info);
247	int bucket_type = BUCKET_TYPE(bucket_info);
248	u4* entry = _entries + bucket_offset;
249
250	if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
251	V value = decode(entry[`0`]);
252	if (EQUALS(value, key, len)) {
253	return value;
254	}
255	} else {
256	// This is a regular bucket, which has more than one
257	// entries. Each entry is a pair of entry (hash, offset).
258	// Seek until the end of the bucket.
259	u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + `1`]);
260	while (entry < entry_max) {
261	unsigned int h = (unsigned int)(entry[`0`]);
262	if (h == hash) {
263	V value = decode(entry[`1`]);
264	if (EQUALS(value, key, len)) {
265	return value;
266	}
267	}
268	entry += `2`;
269	}
270	}
271	}
272	return NULL;
273	}
274
275	template <class ITER>
276	inline void iterate(ITER* iter) const {
277	for (u4 i = `0`; i < _bucket_count; i++) {
278	u4 bucket_info = _buckets[i];
279	u4 bucket_offset = BUCKET_OFFSET(bucket_info);
280	int bucket_type = BUCKET_TYPE(bucket_info);
281	u4* entry = _entries + bucket_offset;
282
283	if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
284	iter->do_value(decode(entry[`0`]));
285	} else {
286	u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + `1`]);
287	while (entry < entry_max) {
288	iter->do_value(decode(entry[`1`]));
289	entry += `2`;
290	}
291	}
292	}
293	}
294
295	void print_table_statistics(outputStream* st, const char* name) {
296	st->print_cr("%s statistics:", name);
297	int total_entries = `0`;
298	int max_bucket = `0`;
299	for (u4 i = `0`; i < _bucket_count; i++) {
300	u4 bucket_info = _buckets[i];
301	int bucket_type = BUCKET_TYPE(bucket_info);
302	int bucket_size;
303
304	if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
305	bucket_size = `1`;
306	} else {
307	bucket_size = (BUCKET_OFFSET(_buckets[i + `1`]) - BUCKET_OFFSET(bucket_info)) / `2`;
308	}
309	total_entries += bucket_size;
310	if (max_bucket < bucket_size) {
311	max_bucket = bucket_size;
312	}
313	}
314	st->print_cr("Number of buckets : %9d", _bucket_count);
315	st->print_cr("Number of entries : %9d", total_entries);
316	st->print_cr("Maximum bucket size : %9d", max_bucket);
317	}
318	};
319
320	////////////////////////////////////////////////////////////////////////
321	//
322	// OffsetCompactHashtable -- This is used to store many types of objects
323	// in the CDS archive. On 64-bit platforms, we save space by using a 32-bit
324	// offset from the CDS base address.
325
326	template <typename V>
327	inline V read_value_from_compact_hashtable(address base_address, u4 offset) {
328	return (V)(base_address + offset);
329	}
330
331	template <
332	typename K,
333	typename V,
334	bool (EQUALS)(V value, K key, int* len)
335	>
336	class OffsetCompactHashtable : public CompactHashtable<
337	K, V, read_value_from_compact_hashtable<V>, EQUALS> {
338	};
339
340
341	////////////////////////////////////////////////////////////////////////
342	//
343	// Read/Write the contents of a hashtable textual dump (created by
344	// SymbolTable::dump and StringTable::dump).
345	// Because the dump file may be big (hundred of MB in extreme cases),
346	// we use mmap for fast access when reading it.
347	//
348	class HashtableTextDump {
349	int _fd;
350	const char* _base;
351	const char* _p;
352	const char* _end;
353	const char* _filename;
354	size_t _size;
355	int _prefix_type;
356	int _line_no;
357	public:
358	HashtableTextDump(const char* filename);
359	~HashtableTextDump();
360
361	enum {
362	SymbolPrefix = `1` << `0`,
363	StringPrefix = `1` << `1`,
364	Unknown = `1` << `2`
365	};
366
367	void quit(const char* err, const char* msg);
368
369	inline int remain() {
370	return (int)(_end - _p);
371	}
372	int last_line_no() {
373	return _line_no - `1`;
374	}
375
376	void corrupted(const char p, const* char *msg);
377
378	inline void corrupted_if(bool cond, const char *msg) {
379	if (cond) {
380	corrupted(_p, msg);
381	}
382	}
383
384	bool skip_newline();
385	int skip(char must_be_char);
386	void skip_past(char c);
387	void check_version(const char* ver);
388
389	inline void get_num(char delim, int *num) {
390	const char* p = _p;
391	const char* end = _end;
392	u8 n = `0`;
393
394	while (p < end) {
395	char c = *p++;
396	if (`'0'` <= c && c <= `'9'`) {
397	n = n * `10` + (c - `'0'`);
398	if (n > (u8)INT_MAX) {
399	corrupted(_p, "Num overflow");
400	}
401	} else if (c == delim) {
402	_p = p;
403	num = (int*)n;
404	return;
405	} else {
406	// Not [0-9], not 'delim'
407	corrupted(_p, "Unrecognized format");;
408	}
409	}
410
411	corrupted(_end, "Incorrect format");
412	ShouldNotReachHere();
413	}
414
415	void scan_prefix_type();
416	int scan_prefix(int* utf8_length);
417	int scan_string_prefix();
418	int scan_symbol_prefix();
419
420	jchar unescape(const char* from, const char* end, int count);
421	void get_utf8(char* utf8_buffer, int utf8_length);
422	static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
423	};
424
425	#endif // SHARE_CLASSFILE_COMPACTHASHTABLE_HPP
426

Browse the source code of OpenJDK/src/hotspot/share/classfile/compactHashtable.hpp