uhash.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/uhash.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	* Copyright (C) 1997-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	******************************************************************************
8	* Date Name Description
9	* 03/22/00 aliu Adapted from original C++ ICU Hashtable.
10	* 07/06/01 aliu Modified to support int32_t keys on
11	* platforms with sizeof(void*) < 32.
12	******************************************************************************
13	*/
14
15	#include "uhash.h"
16	#include "unicode/ustring.h"
17	#include "cstring.h"
18	#include "cmemory.h"
19	#include "uassert.h"
20	#include "ustr_imp.h"
21
22	/ This hashtable is implemented as a double hash. All elements are*
23	* stored in a single array with no secondary storage for collision
24	* resolution (no linked list, etc.). When there is a hash collision
25	* (when two unequal keys have the same hashcode) we resolve this by
26	* using a secondary hash. The secondary hash is an increment
27	* computed as a hash function (a different one) of the primary
28	* hashcode. This increment is added to the initial hash value to
29	* obtain further slots assigned to the same hash code. For this to
30	* work, the length of the array and the increment must be relatively
31	* prime. The easiest way to achieve this is to have the length of
32	* the array be prime, and the increment be any value from
33	* 1..length-1.
34	*
35	* Hashcodes are 32-bit integers. We make sure all hashcodes are
36	* non-negative by masking off the top bit. This has two effects: (1)
37	* modulo arithmetic is simplified. If we allowed negative hashcodes,
38	* then when we computed hashcode % length, we could get a negative
39	* result, which we would then have to adjust back into range. It's
40	* simpler to just make hashcodes non-negative. (2) It makes it easy
41	* to check for empty vs. occupied slots in the table. We just mark
42	* empty or deleted slots with a negative hashcode.
43	*
44	* The central function is _uhash_find(). This function looks for a
45	* slot matching the given key and hashcode. If one is found, it
46	* returns a pointer to that slot. If the table is full, and no match
47	* is found, it returns NULL -- in theory. This would make the code
48	* more complicated, since all callers of _uhash_find() would then
49	* have to check for a NULL result. To keep this from happening, we
50	* don't allow the table to fill. When there is only one
51	* empty/deleted slot left, uhash_put() will refuse to increase the
52	* count, and fail. This simplifies the code. In practice, one will
53	* seldom encounter this using default UHashtables. However, if a
54	* hashtable is set to a U_FIXED resize policy, or if memory is
55	* exhausted, then the table may fill.
56	*
57	* High and low water ratios control rehashing. They establish levels
58	* of fullness (from 0 to 1) outside of which the data array is
59	* reallocated and repopulated. Setting the low water ratio to zero
60	* means the table will never shrink. Setting the high water ratio to
61	* one means the table will never grow. The ratios should be
62	* coordinated with the ratio between successive elements of the
63	* PRIMES table, so that when the primeIndex is incremented or
64	* decremented during rehashing, it brings the ratio of count / length
65	* back into the desired range (between low and high water ratios).
66	*/
67
68	/********************************************************************
69	* PRIVATE Constants, Macros
70	********************************************************************/
71
72	/ This is a list of non-consecutive primes chosen such that*
73	* PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81
74	* to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this
75	* ratio is changed, the low and high water ratios should also be
76	* adjusted to suit.
77	*
78	* These prime numbers were also chosen so that they are the largest
79	* prime number while being less than a power of two.
80	*/
81	static const int32_t PRIMES[] = {
82	`7`, `13`, `31`, `61`, `127`, `251`, `509`, `1021`, `2039`, `4093`, `8191`, `16381`, `32749`,
83	`65521`, `131071`, `262139`, `524287`, `1048573`, `2097143`, `4194301`, `8388593`,
84	`16777213`, `33554393`, `67108859`, `134217689`, `268435399`, `536870909`,
85	`1073741789`, `2147483647` /, 4294967291 /
86	};
87
88	#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
89	#define DEFAULT_PRIME_INDEX 4
90
91	/ These ratios are tuned to the PRIMES array such that a resize*
92	* places the table back into the zone of non-resizing. That is,
93	* after a call to _uhash_rehash(), a subsequent call to
94	* _uhash_rehash() should do nothing (should not churn). This is only
95	* a potential problem with U_GROW_AND_SHRINK.
96	*/
97	static const float RESIZE_POLICY_RATIO_TABLE[`6`] = {
98	/ low, high water ratio /
99	`0.0F`, `0.5F`, / U_GROW: Grow on demand, do not shrink /
100	`0.1F`, `0.5F`, / U_GROW_AND_SHRINK: Grow and shrink on demand /
101	`0.0F`, `1.0F` / U_FIXED: Never change size /
102	};
103
104	/*
105	Invariants for hashcode values:
106
107	* DELETED < 0
108	* EMPTY < 0
109	* Real hashes >= 0
110
111	Hashcodes may not start out this way, but internally they are
112	adjusted so that they are always positive. We assume 32-bit
113	hashcodes; adjust these constants for other hashcode sizes.
114	*/
115	#define HASH_DELETED ((int32_t) 0x80000000)
116	#define HASH_EMPTY ((int32_t) HASH_DELETED + 1)
117
118	#define IS_EMPTY_OR_DELETED(x) ((x) < 0)
119
120	/ This macro expects a UHashTok.pointer as its keypointer and*
121	valuepointer parameters /*
122	#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \
123	if (hash->keyDeleter != NULL && keypointer != NULL) { \
124	(*hash->keyDeleter)(keypointer); \
125	} \
126	if (hash->valueDeleter != NULL && valuepointer != NULL) { \
127	(*hash->valueDeleter)(valuepointer); \
128	} \
129	} UPRV_BLOCK_MACRO_END
130
131	/*
132	* Constants for hinting whether a key or value is an integer
133	* or a pointer. If a hint bit is zero, then the associated
134	* token is assumed to be an integer.
135	*/
136	#define HINT_KEY_POINTER (1)
137	#define HINT_VALUE_POINTER (2)
138
139	/********************************************************************
140	* PRIVATE Implementation
141	********************************************************************/
142
143	static UHashTok
144	_uhash_setElement(UHashtable hash, UHashElement e,
145	int32_t hashcode,
146	UHashTok key, UHashTok value, int8_t hint) {
147
148	UHashTok oldValue = e->value;
149	if (hash->keyDeleter != NULL && e->key.pointer != NULL &&
150	e->key.pointer != key.pointer) { / Avoid double deletion /
151	(*hash->keyDeleter)(e->key.pointer);
152	}
153	if (hash->valueDeleter != NULL) {
154	if (oldValue.pointer != NULL &&
155	oldValue.pointer != value.pointer) { / Avoid double deletion /
156	(*hash->valueDeleter)(oldValue.pointer);
157	}
158	oldValue.pointer = NULL;
159	}
160	/ Compilers should copy the UHashTok union correctly, but even if*
161	* they do, memory heap tools (e.g. BoundsChecker) can get
162	* confused when a pointer is cloaked in a union and then copied.
163	* TO ALLEVIATE THIS, we use hints (based on what API the user is
164	* calling) to copy pointers when we know the user thinks
165	* something is a pointer. */
166	if (hint & HINT_KEY_POINTER) {
167	e->key.pointer = key.pointer;
168	} else {
169	e->key = key;
170	}
171	if (hint & HINT_VALUE_POINTER) {
172	e->value.pointer = value.pointer;
173	} else {
174	e->value = value;
175	}
176	e->hashcode = hashcode;
177	return oldValue;
178	}
179
180	/**
181	* Assumes that the given element is not empty or deleted.
182	*/
183	static UHashTok
184	_uhash_internalRemoveElement(UHashtable hash, UHashElement e) {
185	UHashTok empty;
186	U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
187	--hash->count;
188	empty.pointer = NULL; empty.integer = `0`;
189	return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, `0`);
190	}
191
192	static void
193	_uhash_internalSetResizePolicy(UHashtable hash, enum* UHashResizePolicy policy) {
194	U_ASSERT(hash != NULL);
195	U_ASSERT(((int32_t)policy) >= `0`);
196	U_ASSERT(((int32_t)policy) < `3`);
197	hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * `2`];
198	hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * `2` + `1`];
199	}
200
201	/**
202	* Allocate internal data array of a size determined by the given
203	* prime index. If the index is out of range it is pinned into range.
204	* If the allocation fails the status is set to
205	* U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In
206	* either case the previous array pointer is overwritten.
207	*
208	* Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
209	*/
210	static void
211	_uhash_allocate(UHashtable *hash,
212	int32_t primeIndex,
213	UErrorCode *status) {
214
215	UHashElement p, limit;
216	UHashTok emptytok;
217
218	if (U_FAILURE(status)) return*;
219
220	U_ASSERT(primeIndex >= `0` && primeIndex < PRIMES_LENGTH);
221
222	hash->primeIndex = static_cast<int8_t>(primeIndex);
223	hash->length = PRIMES[primeIndex];
224
225	p = hash->elements = (UHashElement*)
226	uprv_malloc(sizeof(UHashElement) * hash->length);
227
228	if (hash->elements == NULL) {
229	*status = U_MEMORY_ALLOCATION_ERROR;
230	return;
231	}
232
233	emptytok.pointer = NULL; / Only one of these two is needed /
234	emptytok.integer = `0`; / but we don't know which one. /
235
236	limit = p + hash->length;
237	while (p < limit) {
238	p->key = emptytok;
239	p->value = emptytok;
240	p->hashcode = HASH_EMPTY;
241	++p;
242	}
243
244	hash->count = `0`;
245	hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
246	hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
247	}
248
249	static UHashtable*
250	_uhash_init(UHashtable *result,
251	UHashFunction *keyHash,
252	UKeyComparator *keyComp,
253	UValueComparator *valueComp,
254	int32_t primeIndex,
255	UErrorCode *status)
256	{
257	if (U_FAILURE(status)) return* NULL;
258	U_ASSERT(keyHash != NULL);
259	U_ASSERT(keyComp != NULL);
260
261	result->keyHasher = keyHash;
262	result->keyComparator = keyComp;
263	result->valueComparator = valueComp;
264	result->keyDeleter = NULL;
265	result->valueDeleter = NULL;
266	result->allocated = FALSE;
267	_uhash_internalSetResizePolicy(result, U_GROW);
268
269	_uhash_allocate(result, primeIndex, status);
270
271	if (U_FAILURE(*status)) {
272	return NULL;
273	}
274
275	return result;
276	}
277
278	static UHashtable*
279	_uhash_create(UHashFunction *keyHash,
280	UKeyComparator *keyComp,
281	UValueComparator *valueComp,
282	int32_t primeIndex,
283	UErrorCode *status) {
284	UHashtable *result;
285
286	if (U_FAILURE(status)) return* NULL;
287
288	result = (UHashtable) uprv_malloc(sizeof*(UHashtable));
289	if (result == NULL) {
290	*status = U_MEMORY_ALLOCATION_ERROR;
291	return NULL;
292	}
293
294	_uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
295	result->allocated = TRUE;
296
297	if (U_FAILURE(*status)) {
298	uprv_free(result);
299	return NULL;
300	}
301
302	return result;
303	}
304
305	/**
306	* Look for a key in the table, or if no such key exists, the first
307	* empty slot matching the given hashcode. Keys are compared using
308	* the keyComparator function.
309	*
310	* First find the start position, which is the hashcode modulo
311	* the length. Test it to see if it is:
312	*
313	* a. identical: First check the hash values for a quick check,
314	* then compare keys for equality using keyComparator.
315	* b. deleted
316	* c. empty
317	*
318	* Stop if it is identical or empty, otherwise continue by adding a
319	* "jump" value (moduloing by the length again to keep it within
320	* range) and retesting. For efficiency, there need enough empty
321	* values so that the searchs stop within a reasonable amount of time.
322	* This can be changed by changing the high/low water marks.
323	*
324	* In theory, this function can return NULL, if it is full (no empty
325	* or deleted slots) and if no matching key is found. In practice, we
326	* prevent this elsewhere (in uhash_put) by making sure the last slot
327	* in the table is never filled.
328	*
329	* The size of the table should be prime for this algorithm to work;
330	* otherwise we are not guaranteed that the jump value (the secondary
331	* hash) is relatively prime to the table length.
332	*/
333	static UHashElement*
334	_uhash_find(const UHashtable *hash, UHashTok key,
335	int32_t hashcode) {
336
337	int32_t firstDeleted = -`1`; / assume invalid index /
338	int32_t theIndex, startIndex;
339	int32_t jump = `0`; / lazy evaluate /
340	int32_t tableHash;
341	UHashElement *elements = hash->elements;
342
343	hashcode &= `0x7FFFFFFF`; / must be positive /
344	startIndex = theIndex = (hashcode ^ `0x4000000`) % hash->length;
345
346	do {
347	tableHash = elements[theIndex].hashcode;
348	if (tableHash == hashcode) { / quick check /
349	if ((*hash->keyComparator)(key, elements[theIndex].key)) {
350	return &(elements[theIndex]);
351	}
352	} else if (!IS_EMPTY_OR_DELETED(tableHash)) {
353	/ We have hit a slot which contains a key-value pair,*
354	* but for which the hash code does not match. Keep
355	* looking.
356	*/
357	} else if (tableHash == HASH_EMPTY) { / empty, end o' the line /
358	break;
359	} else if (firstDeleted < `0`) { / remember first deleted /
360	firstDeleted = theIndex;
361	}
362	if (jump == `0`) { / lazy compute jump /
363	/ The jump value must be relatively prime to the table*
364	* length. As long as the length is prime, then any value
365	* 1..length-1 will be relatively prime to it.
366	*/
367	jump = (hashcode % (hash->length - `1`)) + `1`;
368	}
369	theIndex = (theIndex + jump) % hash->length;
370	} while (theIndex != startIndex);
371
372	if (firstDeleted >= `0`) {
373	theIndex = firstDeleted; / reset if had deleted slot /
374	} else if (tableHash != HASH_EMPTY) {
375	/ We get to this point if the hashtable is full (no empty or*
376	* deleted slots), and we've failed to find a match. THIS
377	* WILL NEVER HAPPEN as long as uhash_put() makes sure that
378	* count is always < length.
379	*/
380	UPRV_UNREACHABLE;
381	}
382	return &(elements[theIndex]);
383	}
384
385	/**
386	* Attempt to grow or shrink the data arrays in order to make the
387	* count fit between the high and low water marks. hash_put() and
388	* hash_remove() call this method when the count exceeds the high or
389	* low water marks. This method may do nothing, if memory allocation
390	* fails, or if the count is already in range, or if the length is
391	* already at the low or high limit. In any case, upon return the
392	* arrays will be valid.
393	*/
394	static void
395	_uhash_rehash(UHashtable hash, UErrorCode status) {
396
397	UHashElement *old = hash->elements;
398	int32_t oldLength = hash->length;
399	int32_t newPrimeIndex = hash->primeIndex;
400	int32_t i;
401
402	if (hash->count > hash->highWaterMark) {
403	if (++newPrimeIndex >= PRIMES_LENGTH) {
404	return;
405	}
406	} else if (hash->count < hash->lowWaterMark) {
407	if (--newPrimeIndex < `0`) {
408	return;
409	}
410	} else {
411	return;
412	}
413
414	_uhash_allocate(hash, newPrimeIndex, status);
415
416	if (U_FAILURE(*status)) {
417	hash->elements = old;
418	hash->length = oldLength;
419	return;
420	}
421
422	for (i = oldLength - `1`; i >= `0`; --i) {
423	if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
424	UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
425	U_ASSERT(e != NULL);
426	U_ASSERT(e->hashcode == HASH_EMPTY);
427	e->key = old[i].key;
428	e->value = old[i].value;
429	e->hashcode = old[i].hashcode;
430	++hash->count;
431	}
432	}
433
434	uprv_free(old);
435	}
436
437	static UHashTok
438	_uhash_remove(UHashtable *hash,
439	UHashTok key) {
440	/ First find the position of the key in the table. If the object*
441	* has not been removed already, remove it. If the user wanted
442	* keys deleted, then delete it also. We have to put a special
443	* hashcode in that position that means that something has been
444	* deleted, since when we do a find, we have to continue PAST any
445	* deleted values.
446	*/
447	UHashTok result;
448	UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
449	U_ASSERT(e != NULL);
450	result.pointer = NULL;
451	result.integer = `0`;
452	if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
453	result = _uhash_internalRemoveElement(hash, e);
454	if (hash->count < hash->lowWaterMark) {
455	UErrorCode status = U_ZERO_ERROR;
456	_uhash_rehash(hash, &status);
457	}
458	}
459	return result;
460	}
461
462	static UHashTok
463	_uhash_put(UHashtable *hash,
464	UHashTok key,
465	UHashTok value,
466	int8_t hint,
467	UErrorCode *status) {
468
469	/ Put finds the position in the table for the new value. If the*
470	* key is already in the table, it is deleted, if there is a
471	* non-NULL keyDeleter. Then the key, the hash and the value are
472	* all put at the position in their respective arrays.
473	*/
474	int32_t hashcode;
475	UHashElement* e;
476	UHashTok emptytok;
477
478	if (U_FAILURE(*status)) {
479	goto err;
480	}
481	U_ASSERT(hash != NULL);
482	/ Cannot always check pointer here or iSeries sees NULL every time. /
483	if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
484	/ Disallow storage of NULL values, since NULL is returned by*
485	* get() to indicate an absent key. Storing NULL == removing.
486	*/
487	return _uhash_remove(hash, key);
488	}
489	if (hash->count > hash->highWaterMark) {
490	_uhash_rehash(hash, status);
491	if (U_FAILURE(*status)) {
492	goto err;
493	}
494	}
495
496	hashcode = (*hash->keyHasher)(key);
497	e = _uhash_find(hash, key, hashcode);
498	U_ASSERT(e != NULL);
499
500	if (IS_EMPTY_OR_DELETED(e->hashcode)) {
501	/ Important: We must never actually fill the table up. If we*
502	* do so, then _uhash_find() will return NULL, and we'll have
503	* to check for NULL after every call to _uhash_find(). To
504	* avoid this we make sure there is always at least one empty
505	* or deleted slot in the table. This only is a problem if we
506	* are out of memory and rehash isn't working.
507	*/
508	++hash->count;
509	if (hash->count == hash->length) {
510	/ Don't allow count to reach length /
511	--hash->count;
512	*status = U_MEMORY_ALLOCATION_ERROR;
513	goto err;
514	}
515	}
516
517	/ We must in all cases handle storage properly. If there was an*
518	* old key, then it must be deleted (if the deleter != NULL).
519	* Make hashcodes stored in table positive.
520	*/
521	return _uhash_setElement(hash, e, hashcode & `0x7FFFFFFF`, key, value, hint);
522
523	err:
524	/ If the deleters are non-NULL, this method adopts its key and/or*
525	* value arguments, and we must be sure to delete the key and/or
526	* value in all cases, even upon failure.
527	*/
528	HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
529	emptytok.pointer = NULL; emptytok.integer = `0`;
530	return emptytok;
531	}
532
533
534	/********************************************************************
535	* PUBLIC API
536	********************************************************************/
537
538	U_CAPI UHashtable* U_EXPORT2
539	uhash_open(UHashFunction *keyHash,
540	UKeyComparator *keyComp,
541	UValueComparator *valueComp,
542	UErrorCode *status) {
543
544	return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
545	}
546
547	U_CAPI UHashtable* U_EXPORT2
548	uhash_openSize(UHashFunction *keyHash,
549	UKeyComparator *keyComp,
550	UValueComparator *valueComp,
551	int32_t size,
552	UErrorCode *status) {
553
554	/ Find the smallest index i for which PRIMES[i] >= size. /
555	int32_t i = `0`;
556	while (i<(PRIMES_LENGTH-`1`) && PRIMES[i]<size) {
557	++i;
558	}
559
560	return _uhash_create(keyHash, keyComp, valueComp, i, status);
561	}
562
563	U_CAPI UHashtable* U_EXPORT2
564	uhash_init(UHashtable *fillinResult,
565	UHashFunction *keyHash,
566	UKeyComparator *keyComp,
567	UValueComparator *valueComp,
568	UErrorCode *status) {
569
570	return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
571	}
572
573	U_CAPI UHashtable* U_EXPORT2
574	uhash_initSize(UHashtable *fillinResult,
575	UHashFunction *keyHash,
576	UKeyComparator *keyComp,
577	UValueComparator *valueComp,
578	int32_t size,
579	UErrorCode *status) {
580
581	// Find the smallest index i for which PRIMES[i] >= size.
582	int32_t i = `0`;
583	while (i<(PRIMES_LENGTH-`1`) && PRIMES[i]<size) {
584	++i;
585	}
586	return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
587	}
588
589	U_CAPI void U_EXPORT2
590	uhash_close(UHashtable *hash) {
591	if (hash == NULL) {
592	return;
593	}
594	if (hash->elements != NULL) {
595	if (hash->keyDeleter != NULL \|\| hash->valueDeleter != NULL) {
596	int32_t pos=UHASH_FIRST;
597	UHashElement *e;
598	while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) {
599	HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
600	}
601	}
602	uprv_free(hash->elements);
603	hash->elements = NULL;
604	}
605	if (hash->allocated) {
606	uprv_free(hash);
607	}
608	}
609
610	U_CAPI UHashFunction *U_EXPORT2
611	uhash_setKeyHasher(UHashtable hash, UHashFunction fn) {
612	UHashFunction *result = hash->keyHasher;
613	hash->keyHasher = fn;
614	return result;
615	}
616
617	U_CAPI UKeyComparator *U_EXPORT2
618	uhash_setKeyComparator(UHashtable hash, UKeyComparator fn) {
619	UKeyComparator *result = hash->keyComparator;
620	hash->keyComparator = fn;
621	return result;
622	}
623	U_CAPI UValueComparator *U_EXPORT2
624	uhash_setValueComparator(UHashtable hash, UValueComparator fn){
625	UValueComparator *result = hash->valueComparator;
626	hash->valueComparator = fn;
627	return result;
628	}
629
630	U_CAPI UObjectDeleter *U_EXPORT2
631	uhash_setKeyDeleter(UHashtable hash, UObjectDeleter fn) {
632	UObjectDeleter *result = hash->keyDeleter;
633	hash->keyDeleter = fn;
634	return result;
635	}
636
637	U_CAPI UObjectDeleter *U_EXPORT2
638	uhash_setValueDeleter(UHashtable hash, UObjectDeleter fn) {
639	UObjectDeleter *result = hash->valueDeleter;
640	hash->valueDeleter = fn;
641	return result;
642	}
643
644	U_CAPI void U_EXPORT2
645	uhash_setResizePolicy(UHashtable hash, enum* UHashResizePolicy policy) {
646	UErrorCode status = U_ZERO_ERROR;
647	_uhash_internalSetResizePolicy(hash, policy);
648	hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
649	hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
650	_uhash_rehash(hash, &status);
651	}
652
653	U_CAPI int32_t U_EXPORT2
654	uhash_count(const UHashtable *hash) {
655	return hash->count;
656	}
657
658	U_CAPI void* U_EXPORT2
659	uhash_get(const UHashtable *hash,
660	const void* key) {
661	UHashTok keyholder;
662	keyholder.pointer = (void*) key;
663	return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
664	}
665
666	U_CAPI void* U_EXPORT2
667	uhash_iget(const UHashtable *hash,
668	int32_t key) {
669	UHashTok keyholder;
670	keyholder.integer = key;
671	return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
672	}
673
674	U_CAPI int32_t U_EXPORT2
675	uhash_geti(const UHashtable *hash,
676	const void* key) {
677	UHashTok keyholder;
678	keyholder.pointer = (void*) key;
679	return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
680	}
681
682	U_CAPI int32_t U_EXPORT2
683	uhash_igeti(const UHashtable *hash,
684	int32_t key) {
685	UHashTok keyholder;
686	keyholder.integer = key;
687	return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
688	}
689
690	U_CAPI void* U_EXPORT2
691	uhash_put(UHashtable *hash,
692	void* key,
693	void* value,
694	UErrorCode *status) {
695	UHashTok keyholder, valueholder;
696	keyholder.pointer = key;
697	valueholder.pointer = value;
698	return _uhash_put(hash, keyholder, valueholder,
699	HINT_KEY_POINTER \| HINT_VALUE_POINTER,
700	status).pointer;
701	}
702
703	U_CAPI void* U_EXPORT2
704	uhash_iput(UHashtable *hash,
705	int32_t key,
706	void* value,
707	UErrorCode *status) {
708	UHashTok keyholder, valueholder;
709	keyholder.integer = key;
710	valueholder.pointer = value;
711	return _uhash_put(hash, keyholder, valueholder,
712	HINT_VALUE_POINTER,
713	status).pointer;
714	}
715
716	U_CAPI int32_t U_EXPORT2
717	uhash_puti(UHashtable *hash,
718	void* key,
719	int32_t value,
720	UErrorCode *status) {
721	UHashTok keyholder, valueholder;
722	keyholder.pointer = key;
723	valueholder.integer = value;
724	return _uhash_put(hash, keyholder, valueholder,
725	HINT_KEY_POINTER,
726	status).integer;
727	}
728
729
730	U_CAPI int32_t U_EXPORT2
731	uhash_iputi(UHashtable *hash,
732	int32_t key,
733	int32_t value,
734	UErrorCode *status) {
735	UHashTok keyholder, valueholder;
736	keyholder.integer = key;
737	valueholder.integer = value;
738	return _uhash_put(hash, keyholder, valueholder,
739	`0`, / neither is a ptr /
740	status).integer;
741	}
742
743	U_CAPI void* U_EXPORT2
744	uhash_remove(UHashtable *hash,
745	const void* key) {
746	UHashTok keyholder;
747	keyholder.pointer = (void*) key;
748	return _uhash_remove(hash, keyholder).pointer;
749	}
750
751	U_CAPI void* U_EXPORT2
752	uhash_iremove(UHashtable *hash,
753	int32_t key) {
754	UHashTok keyholder;
755	keyholder.integer = key;
756	return _uhash_remove(hash, keyholder).pointer;
757	}
758
759	U_CAPI int32_t U_EXPORT2
760	uhash_removei(UHashtable *hash,
761	const void* key) {
762	UHashTok keyholder;
763	keyholder.pointer = (void*) key;
764	return _uhash_remove(hash, keyholder).integer;
765	}
766
767	U_CAPI int32_t U_EXPORT2
768	uhash_iremovei(UHashtable *hash,
769	int32_t key) {
770	UHashTok keyholder;
771	keyholder.integer = key;
772	return _uhash_remove(hash, keyholder).integer;
773	}
774
775	U_CAPI void U_EXPORT2
776	uhash_removeAll(UHashtable *hash) {
777	int32_t pos = UHASH_FIRST;
778	const UHashElement *e;
779	U_ASSERT(hash != NULL);
780	if (hash->count != `0`) {
781	while ((e = uhash_nextElement(hash, &pos)) != NULL) {
782	uhash_removeElement(hash, e);
783	}
784	}
785	U_ASSERT(hash->count == `0`);
786	}
787
788	U_CAPI const UHashElement* U_EXPORT2
789	uhash_find(const UHashtable hash, const* void* key) {
790	UHashTok keyholder;
791	const UHashElement *e;
792	keyholder.pointer = (void*) key;
793	e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
794	return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e;
795	}
796
797	U_CAPI const UHashElement* U_EXPORT2
798	uhash_nextElement(const UHashtable hash, int32_t pos) {
799	/ Walk through the array until we find an element that is not*
800	* EMPTY and not DELETED.
801	*/
802	int32_t i;
803	U_ASSERT(hash != NULL);
804	for (i = *pos + `1`; i < hash->length; ++i) {
805	if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
806	*pos = i;
807	return &(hash->elements[i]);
808	}
809	}
810
811	/ No more elements /
812	return NULL;
813	}
814
815	U_CAPI void* U_EXPORT2
816	uhash_removeElement(UHashtable hash, const* UHashElement* e) {
817	U_ASSERT(hash != NULL);
818	U_ASSERT(e != NULL);
819	if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
820	UHashElement nce = (UHashElement )e;
821	return _uhash_internalRemoveElement(hash, nce).pointer;
822	}
823	return NULL;
824	}
825
826	/********************************************************************
827	* UHashTok convenience
828	********************************************************************/
829
830	/**
831	* Return a UHashTok for an integer.
832	*/
833	/U_CAPI UHashTok U_EXPORT2*
834	uhash_toki(int32_t i) {
835	UHashTok tok;
836	tok.integer = i;
837	return tok;
838	}/*
839
840	/**
841	* Return a UHashTok for a pointer.
842	*/
843	/U_CAPI UHashTok U_EXPORT2*
844	uhash_tokp(void p) {*
845	UHashTok tok;
846	tok.pointer = p;
847	return tok;
848	}/*
849
850	/********************************************************************
851	* PUBLIC Key Hash Functions
852	********************************************************************/
853
854	U_CAPI int32_t U_EXPORT2
855	uhash_hashUChars(const UHashTok key) {
856	const UChar s = (const* UChar *)key.pointer;
857	return s == NULL ? `0` : ustr_hashUCharsN(s, u_strlen(s));
858	}
859
860	U_CAPI int32_t U_EXPORT2
861	uhash_hashChars(const UHashTok key) {
862	const char s = (const* char *)key.pointer;
863	return s == NULL ? `0` : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))));
864	}
865
866	U_CAPI int32_t U_EXPORT2
867	uhash_hashIChars(const UHashTok key) {
868	const char s = (const* char *)key.pointer;
869	return s == NULL ? `0` : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s)));
870	}
871
872	U_CAPI UBool U_EXPORT2
873	uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
874	int32_t count1, count2, pos, i;
875
876	if(hash1==hash2){
877	return TRUE;
878	}
879
880	/*
881	* Make sure that we are comparing 2 valid hashes of the same type
882	* with valid comparison functions.
883	* Without valid comparison functions, a binary comparison
884	* of the hash values will yield random results on machines
885	* with 64-bit pointers and 32-bit integer hashes.
886	* A valueComparator is normally optional.
887	*/
888	if (hash1==NULL \|\| hash2==NULL \|\|
889	hash1->keyComparator != hash2->keyComparator \|\|
890	hash1->valueComparator != hash2->valueComparator \|\|
891	hash1->valueComparator == NULL)
892	{
893	/*
894	Normally we would return an error here about incompatible hash tables,
895	but we return FALSE instead.
896	*/
897	return FALSE;
898	}
899
900	count1 = uhash_count(hash1);
901	count2 = uhash_count(hash2);
902	if(count1!=count2){
903	return FALSE;
904	}
905
906	pos=UHASH_FIRST;
907	for(i=`0`; i<count1; i++){
908	const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
909	const UHashTok key1 = elem1->key;
910	const UHashTok val1 = elem1->value;
911	/ here the keys are not compared, instead the key form hash1 is used to fetch*
912	* value from hash2. If the hashes are equal then then both hashes should
913	* contain equal values for the same key!
914	*/
915	const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
916	const UHashTok val2 = elem2->value;
917	if(hash1->valueComparator(val1, val2)==FALSE){
918	return FALSE;
919	}
920	}
921	return TRUE;
922	}
923
924	/********************************************************************
925	* PUBLIC Comparator Functions
926	********************************************************************/
927
928	U_CAPI UBool U_EXPORT2
929	uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
930	const UChar p1 = (const* UChar*) key1.pointer;
931	const UChar p2 = (const* UChar*) key2.pointer;
932	if (p1 == p2) {
933	return TRUE;
934	}
935	if (p1 == NULL \|\| p2 == NULL) {
936	return FALSE;
937	}
938	while (p1 != `0` && p1 == *p2) {
939	++p1;
940	++p2;
941	}
942	return (UBool)(p1 == p2);
943	}
944
945	U_CAPI UBool U_EXPORT2
946	uhash_compareChars(const UHashTok key1, const UHashTok key2) {
947	const char p1 = (const* char*) key1.pointer;
948	const char p2 = (const* char*) key2.pointer;
949	if (p1 == p2) {
950	return TRUE;
951	}
952	if (p1 == NULL \|\| p2 == NULL) {
953	return FALSE;
954	}
955	while (p1 != `0` && p1 == *p2) {
956	++p1;
957	++p2;
958	}
959	return (UBool)(p1 == p2);
960	}
961
962	U_CAPI UBool U_EXPORT2
963	uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
964	const char p1 = (const* char*) key1.pointer;
965	const char p2 = (const* char*) key2.pointer;
966	if (p1 == p2) {
967	return TRUE;
968	}
969	if (p1 == NULL \|\| p2 == NULL) {
970	return FALSE;
971	}
972	while (p1 != `0` && uprv_tolower(p1) == uprv_tolower(*p2)) {
973	++p1;
974	++p2;
975	}
976	return (UBool)(p1 == p2);
977	}
978
979	/********************************************************************
980	* PUBLIC int32_t Support Functions
981	********************************************************************/
982
983	U_CAPI int32_t U_EXPORT2
984	uhash_hashLong(const UHashTok key) {
985	return key.integer;
986	}
987
988	U_CAPI UBool U_EXPORT2
989	uhash_compareLong(const UHashTok key1, const UHashTok key2) {
990	return (UBool)(key1.integer == key2.integer);
991	}
992

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/uhash.cpp