dict.cpp source code [OpenJDK/src/hotspot/share/libadt/dict.cpp]

1	/*
2	* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4	*
5	* This code is free software; you can redistribute it and/or modify it
6	* under the terms of the GNU General Public License version 2 only, as
7	* published by the Free Software Foundation.
8	*
9	* This code is distributed in the hope that it will be useful, but WITHOUT
10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12	* version 2 for more details (a copy is included in the LICENSE file that
13	* accompanied this code).
14	*
15	* You should have received a copy of the GNU General Public License version
16	* 2 along with this work; if not, write to the Free Software Foundation,
17	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18	*
19	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20	* or visit www.oracle.com if you need additional information or have any
21	* questions.
22	*
23	*/
24
25	#include "precompiled.hpp"
26	#include "libadt/dict.hpp"
27
28	// Dictionaries - An Abstract Data Type
29
30	// %%%%% includes not needed with AVM framework - Ungar
31
32	#include <assert.h>
33
34	//------------------------------data-----------------------------------------
35	// String hash tables
36	#define MAXID 20
37	static uint8_t initflag = `0`; // True after 1st initialization
38	static const char shft[MAXID] = {`1`,`2`,`3`,`4`,`5`,`6`,`7`,`1`,`2`,`3`,`4`,`5`,`6`,`7`,`1`,`2`,`3`,`4`,`5`,`6`};
39	static short xsum[MAXID];
40
41	//------------------------------bucket---------------------------------------
42	class bucket : public ResourceObj {
43	public:
44	uint _cnt, _max; // Size of bucket
45	void *_keyvals; // Array of keys and values*
46	};
47
48	//------------------------------Dict-----------------------------------------
49	// The dictionary is kept has a hash table. The hash table is a even power
50	// of two, for nice modulo operations. Each bucket in the hash table points
51	// to a linear list of key-value pairs; each key & value is just a (void ).*
52	// The list starts with a count. A hash lookup finds the list head, then a
53	// simple linear scan finds the key. If the table gets too full, it's
54	// doubled in size; the total amount of EXTRA times all hash functions are
55	// computed for the doubling is no more than the current size - thus the
56	// doubling in size costs no more than a constant factor in speed.
57	Dict::Dict(CmpKey initcmp, Hash inithash) : _arena(Thread::current()->resource_area()),
58	_hash(inithash), _cmp(initcmp) {
59	int i;
60
61	// Precompute table of null character hashes
62	if( !initflag ) { // Not initializated yet?
63	xsum[`0`] = (`1`<<shft[`0`])+`1`; // Initialize
64	for(i=`1`; i<MAXID; i++) {
65	xsum[i] = (`1`<<shft[i])+`1`+xsum[i-`1`];
66	}
67	initflag = `1`; // Never again
68	}
69
70	_size = `16`; // Size is a power of 2
71	_cnt = `0`; // Dictionary is empty
72	_bin = (bucket)_arena->Amalloc_4(sizeof(bucket)_size);
73	memset((void)_bin,`0`,sizeof(bucket)_size);
74	}
75
76	Dict::Dict(CmpKey initcmp, Hash inithash, Arena arena, int* size)
77	: _arena(arena), _hash(inithash), _cmp(initcmp) {
78	int i;
79
80	// Precompute table of null character hashes
81	if( !initflag ) { // Not initializated yet?
82	xsum[`0`] = (`1`<<shft[`0`])+`1`; // Initialize
83	for(i=`1`; i<MAXID; i++) {
84	xsum[i] = (`1`<<shft[i])+`1`+xsum[i-`1`];
85	}
86	initflag = `1`; // Never again
87	}
88
89	i=`16`;
90	while( i < size ) i <<= `1`;
91	_size = i; // Size is a power of 2
92	_cnt = `0`; // Dictionary is empty
93	_bin = (bucket)_arena->Amalloc_4(sizeof(bucket)_size);
94	memset((void)_bin,`0`,sizeof(bucket)_size);
95	}
96
97	//------------------------------~Dict------------------------------------------
98	// Delete an existing dictionary.
99	Dict::~Dict() {
100	/*
101	tty->print("~Dict %d/%d: ",_cnt,_size);
102	for( uint i=0; i < _size; i++) // For complete new table do
103	tty->print("%d ",_bin[i]._cnt);
104	tty->print("\n");/*
105	/for( uint i=0; i<_size; i++ ) {*
106	FREE_FAST( _bin[i]._keyvals );
107	} /*
108	}
109
110	//------------------------------Clear----------------------------------------
111	// Zap to empty; ready for re-use
112	void Dict::Clear() {
113	_cnt = `0`; // Empty contents
114	for( uint i=`0`; i<_size; i++ )
115	_bin[i]._cnt = `0`; // Empty buckets, but leave allocated
116	// Leave _size & _bin alone, under the assumption that dictionary will
117	// grow to this size again.
118	}
119
120	//------------------------------doubhash---------------------------------------
121	// Double hash table size. If can't do so, just suffer. If can, then run
122	// thru old hash table, moving things to new table. Note that since hash
123	// table doubled, exactly 1 new bit is exposed in the mask - so everything
124	// in the old table ends up on 1 of two lists in the new table; a hi and a
125	// lo list depending on the value of the bit.
126	void Dict::doubhash(void) {
127	uint oldsize = _size;
128	_size <<= `1`; // Double in size
129	_bin = (bucket)_arena->Arealloc(_bin, sizeof(bucket) oldsize, sizeof(bucket) * _size);
130	memset((void)(&_bin[oldsize]), `0`, oldsize sizeof(bucket));
131	// Rehash things to spread into new table
132	for (uint i = `0`; i < oldsize; i++) { // For complete OLD table do
133	bucket b = &_bin[i]; // Handy shortcut for _bin[i]*
134	if (!b->_keyvals) continue; // Skip empties fast
135
136	bucket nb = &_bin[i+oldsize]; // New bucket shortcut*
137	uint j = b->_max; // Trim new bucket to nearest power of 2
138	while (j > b->_cnt) { j >>= `1`; } // above old bucket _cnt
139	if (!j) { j = `1`; } // Handle zero-sized buckets
140	nb->_max = j << `1`;
141	// Allocate worst case space for key-value pairs
142	nb->_keyvals = (void)_arena->Amalloc_4(sizeof*(void* ) nb->_max * `2`);
143	uint nbcnt = `0`;
144
145	for (j = `0`; j < b->_cnt; ) { // Rehash all keys in this bucket
146	void *key = b->_keyvals[j + j];
147	if ((_hash(key) & (_size-`1`)) != i) { // Moving to hi bucket?
148	nb->_keyvals[nbcnt + nbcnt] = key;
149	nb->_keyvals[nbcnt + nbcnt + `1`] = b->_keyvals[j + j + `1`];
150	nb->_cnt = nbcnt = nbcnt + `1`;
151	b->_cnt--; // Remove key/value from lo bucket
152	b->_keyvals[j + j] = b->_keyvals[b->_cnt + b->_cnt];
153	b->_keyvals[j + j + `1`] = b->_keyvals[b->_cnt + b->_cnt + `1`];
154	// Don't increment j, hash compacted element also.
155	} else {
156	j++; // Iterate.
157	}
158	} // End of for all key-value pairs in bucket
159	} // End of for all buckets
160	}
161
162	//------------------------------Dict-----------------------------------------
163	// Deep copy a dictionary.
164	Dict::Dict( const Dict &d ) : ResourceObj (d), _arena(d._arena), _size(d._size), _cnt(d._cnt), _hash(d._hash), _cmp(d._cmp) {
165	_bin = (bucket)_arena->Amalloc_4(sizeof(bucket)_size);
166	memcpy( (void)_bin, (void)d._bin, sizeof*(bucket)_size );
167	for( uint i=`0`; i<_size; i++ ) {
168	if( !_bin[i]._keyvals ) continue;
169	_bin[i]._keyvals=(void)_arena->Amalloc_4( sizeof*(void* )_bin[i]._max*`2`);
170	memcpy( _bin[i]._keyvals, d._bin[i]._keyvals,_bin[i]._cnt`2`sizeof(void*));
171	}
172	}
173
174	//------------------------------Dict-----------------------------------------
175	// Deep copy a dictionary.
176	Dict &Dict::operator =( const Dict &d ) {
177	if( _size < d._size ) { // If must have more buckets
178	_arena = d._arena;
179	_bin = (bucket)_arena->Arealloc( _bin, sizeof(bucket)_size, sizeof(bucket)*d._size );
180	memset( (void)(&_bin[_size]), `0`, (d._size-_size)sizeof(bucket) );
181	_size = d._size;
182	}
183	uint i;
184	for( i=`0`; i<_size; i++ ) // All buckets are empty
185	_bin[i]._cnt = `0`; // But leave bucket allocations alone
186	_cnt = d._cnt;
187	(Hash)(&_hash) = d._hash;
188	(CmpKey)(&_cmp) = d._cmp;
189	for( i=`0`; i<_size; i++ ) {
190	bucket b = &d._bin[i]; // Shortcut to source bucket*
191	for( uint j=`0`; j<b->_cnt; j++ )
192	Insert( b->_keyvals[j+j], b->_keyvals[j+j+`1`] );
193	}
194	return *this;
195	}
196
197	//------------------------------Insert----------------------------------------
198	// Insert or replace a key/value pair in the given dictionary. If the
199	// dictionary is too full, it's size is doubled. The prior value being
200	// replaced is returned (NULL if this is a 1st insertion of that key). If
201	// an old value is found, it's swapped with the prior key-value pair on the
202	// list. This moves a commonly searched-for value towards the list head.
203	void Dict::Insert(void* key, void* val, bool* replace) {
204	uint hash = _hash( key ); // Get hash key
205	uint i = hash & (_size-`1`); // Get hash key, corrected for size
206	bucket b = &_bin[i]; // Handy shortcut*
207	for( uint j=`0`; j<b->_cnt; j++ ) {
208	if( !_cmp(key,b->_keyvals[j+j]) ) {
209	if (!replace) {
210	return b->_keyvals[j+j+`1`];
211	} else {
212	void *prior = b->_keyvals[j+j+`1`];
213	b->_keyvals[j+j ] = key; // Insert current key-value
214	b->_keyvals[j+j+`1`] = val;
215	return prior; // Return prior
216	}
217	}
218	}
219	if( ++_cnt > _size ) { // Hash table is full
220	doubhash(); // Grow whole table if too full
221	i = hash & (_size-`1`); // Rehash
222	b = &_bin[i]; // Handy shortcut
223	}
224	if( b->_cnt == b->_max ) { // Must grow bucket?
225	if( !b->_keyvals ) {
226	b->_max = `2`; // Initial bucket size
227	b->_keyvals = (void)_arena->Amalloc_4(sizeof*(void*) b->_max * `2`);
228	} else {
229	b->_keyvals = (void)_arena->Arealloc(b->_keyvals, sizeof*(void*) b->_max * `2`, sizeof(void) b->_max * `4`);
230	b->_max <<= `1`; // Double bucket
231	}
232	}
233	b->_keyvals[b->_cnt+b->_cnt ] = key;
234	b->_keyvals[b->_cnt+b->_cnt+`1`] = val;
235	b->_cnt++;
236	return NULL; // Nothing found prior
237	}
238
239	//------------------------------Delete---------------------------------------
240	// Find & remove a value from dictionary. Return old value.
241	void Dict::Delete(void* *key) {
242	uint i = _hash( key ) & (_size-`1`); // Get hash key, corrected for size
243	bucket b = &_bin[i]; // Handy shortcut*
244	for( uint j=`0`; j<b->_cnt; j++ )
245	if( !_cmp(key,b->_keyvals[j+j]) ) {
246	void *prior = b->_keyvals[j+j+`1`];
247	b->_cnt--; // Remove key/value from lo bucket
248	b->_keyvals[j+j ] = b->_keyvals[b->_cnt+b->_cnt ];
249	b->_keyvals[j+j+`1`] = b->_keyvals[b->_cnt+b->_cnt+`1`];
250	_cnt--; // One less thing in table
251	return prior;
252	}
253	return NULL;
254	}
255
256	//------------------------------FindDict-------------------------------------
257	// Find a key-value pair in the given dictionary. If not found, return NULL.
258	// If found, move key-value pair towards head of list.
259	void Dict::operator* [](const void key) const* {
260	uint i = _hash( key ) & (_size-`1`); // Get hash key, corrected for size
261	bucket b = &_bin[i]; // Handy shortcut*
262	for( uint j=`0`; j<b->_cnt; j++ )
263	if( !_cmp(key,b->_keyvals[j+j]) )
264	return b->_keyvals[j+j+`1`];
265	return NULL;
266	}
267
268	//------------------------------CmpDict--------------------------------------
269	// CmpDict compares two dictionaries; they must have the same keys (their
270	// keys must match using CmpKey) and they must have the same values (pointer
271	// comparison). If so 1 is returned, if not 0 is returned.
272	int32_t Dict::operator ==(const Dict &d2) const {
273	if( _cnt != d2._cnt ) return `0`;
274	if( _hash != d2._hash ) return `0`;
275	if( _cmp != d2._cmp ) return `0`;
276	for( uint i=`0`; i < _size; i++) { // For complete hash table do
277	bucket b = &_bin[i]; // Handy shortcut*
278	if( b->_cnt != d2._bin[i]._cnt ) return `0`;
279	if( memcmp(b->_keyvals, d2._bin[i]._keyvals, b->_cnt`2`sizeof(void*) ) )
280	return `0`; // Key-value pairs must match
281	}
282	return `1`; // All match, is OK
283	}
284
285	//------------------------------print------------------------------------------
286	// Handier print routine
287	void Dict::print() {
288	DictI i(this); // Moved definition in iterator here because of g++.
289	tty->print("Dict@" INTPTR_FORMAT "[%d] = {", p2i(this), _cnt);
290	for( ; i.test(); ++i ) {
291	tty->print("(" INTPTR_FORMAT "," INTPTR_FORMAT "),", p2i(i._key), p2i(i._value));
292	}
293	tty->print_cr("}");
294	}
295
296	//------------------------------Hashing Functions----------------------------
297	// Convert string to hash key. This algorithm implements a universal hash
298	// function with the multipliers frozen (ok, so it's not universal). The
299	// multipliers (and allowable characters) are all odd, so the resultant sum
300	// is odd - guaranteed not divisible by any power of two, so the hash tables
301	// can be any power of two with good results. Also, I choose multipliers
302	// that have only 2 bits set (the low is always set to be odd) so
303	// multiplication requires only shifts and adds. Characters are required to
304	// be in the range 0-127 (I double & add 1 to force oddness). Keys are
305	// limited to MAXID characters in length. Experimental evidence on 150K of
306	// C text shows excellent spreading of values for any size hash table.
307	int hashstr(const void *t) {
308	char c, k = `0`;
309	int32_t sum = `0`;
310	const char s = (const* char *)t;
311
312	while( ((c = s++) != `'\0'`) && (k < MAXID-`1`) ) { // Get characters till null or MAXID-1*
313	c = (c<<`1`)+`1`; // Characters are always odd!
314	sum += c + (c<<shft[k++]); // Universal hash function
315	}
316	return (int)((sum+xsum[k]) >> `1`); // Hash key, un-modulo'd table size
317	}
318
319	//------------------------------hashptr--------------------------------------
320	// Slimey cheap hash function; no guaranteed performance. Better than the
321	// default for pointers, especially on MS-DOS machines.
322	int hashptr(const void *key) {
323	return ((intptr_t)key >> `2`);
324	}
325
326	// Slimey cheap hash function; no guaranteed performance.
327	int hashkey(const void *key) {
328	return (intptr_t)key;
329	}
330
331	//------------------------------Key Comparator Functions---------------------
332	int32_t cmpstr(const void k1, const* void *k2) {
333	return strcmp((const char )k1,(const* char *)k2);
334	}
335
336	// Cheap key comparator.
337	int32_t cmpkey(const void key1, const* void *key2) {
338	if (key1 == key2) return `0`;
339	intptr_t delta = (intptr_t)key1 - (intptr_t)key2;
340	if (delta > `0`) return `1`;
341	return -`1`;
342	}
343
344	//=============================================================================
345	//------------------------------reset------------------------------------------
346	// Create an iterator and initialize the first variables.
347	void DictI::reset( const Dict *dict ) {
348	_d = dict; // The dictionary
349	_i = (uint)-`1`; // Before the first bin
350	_j = `0`; // Nothing left in the current bin
351	++(*this); // Step to first real value
352	}
353
354	//------------------------------next-------------------------------------------
355	// Find the next key-value pair in the dictionary, or return a NULL key and
356	// value.
357	void DictI::operator ++(void) {
358	if( _j-- ) { // Still working in current bin?
359	_key = _d->_bin[_i]._keyvals[_j+_j];
360	_value = _d->_bin[_i]._keyvals[_j+_j+`1`];
361	return;
362	}
363
364	while( ++_i < _d->_size ) { // Else scan for non-zero bucket
365	_j = _d->_bin[_i]._cnt;
366	if( !_j ) continue;
367	_j--;
368	_key = _d->_bin[_i]._keyvals[_j+_j];
369	_value = _d->_bin[_i]._keyvals[_j+_j+`1`];
370	return;
371	}
372	_key = _value = NULL;
373	}
374

Browse the source code of OpenJDK/src/hotspot/share/libadt/dict.cpp