hashtab.c source code [neovim/src/nvim/hashtab.c]

1	// This is an open source non-commercial project. Dear PVS-Studio, please check
2	// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
3
4	/// @file hashtab.c
5	///
6	/// Handling of a hashtable with Vim-specific properties.
7	///
8	/// Each item in a hashtable has a NUL terminated string key. A key can appear
9	/// only once in the table.
10	///
11	/// A hash number is computed from the key for quick lookup. When the hashes
12	/// of two different keys point to the same entry an algorithm is used to
13	/// iterate over other entries in the table until the right one is found.
14	/// To make the iteration work removed keys are different from entries where a
15	/// key was never present.
16	///
17	/// The mechanism has been partly based on how Python Dictionaries are
18	/// implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.
19	///
20	/// The hashtable grows to accommodate more entries when needed. At least 1/3
21	/// of the entries is empty to keep the lookup efficient (at the cost of extra
22	/// memory).
23
24	#include <assert.h>
25	#include <stdbool.h>
26	#include <string.h>
27	#include <inttypes.h>
28
29	#include "nvim/vim.h"
30	#include "nvim/ascii.h"
31	#include "nvim/hashtab.h"
32	#include "nvim/message.h"
33	#include "nvim/memory.h"
34
35	// Magic value for algorithm that walks through the array.
36	#define PERTURB_SHIFT 5
37
38	#ifdef INCLUDE_GENERATED_DECLARATIONS
39	# include "hashtab.c.generated.h"
40	#endif
41
42	char hash_removed;
43
44	/// Initialize an empty hash table.
45	void hash_init(hashtab_T *ht)
46	{
47	// This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray".
48	memset(ht, `0`, sizeof(hashtab_T));
49	ht->ht_array = ht->ht_smallarray;
50	ht->ht_mask = HT_INIT_SIZE - `1`;
51	}
52
53	/// Free the array of a hash table without freeing contained values.
54	///
55	/// If "ht" is not freed (after calling this) then you should call hash_init()
56	/// right next!
57	void hash_clear(hashtab_T *ht)
58	{
59	if (ht->ht_array != ht->ht_smallarray) {
60	xfree(ht->ht_array);
61	}
62	}
63
64	/// Free the array of a hash table and all contained values.
65	///
66	/// @param off the offset from start of value to start of key (@see hashitem_T).
67	void hash_clear_all(hashtab_T ht, unsigned* int off)
68	{
69	size_t todo = ht->ht_used;
70	for (hashitem_T *hi = ht->ht_array; todo > `0`; ++hi) {
71	if (!HASHITEM_EMPTY(hi)) {
72	xfree(hi->hi_key - off);
73	todo--;
74	}
75	}
76	hash_clear(ht);
77	}
78
79	/// Find item for given "key" in hashtable "ht".
80	///
81	/// @param key The key of the looked-for item. Must not be NULL.
82	///
83	/// @return Pointer to the hash item corresponding to the given key.
84	/// If not found, then return pointer to the empty item that would be
85	/// used for that key.
86	/// WARNING: Returned pointer becomes invalid as soon as the hash table
87	/// is changed in any way.
88	hashitem_T hash_find(const* hashtab_T *const ht, const char_u *const key)
89	{
90	return hash_lookup(ht, (const char *)key, STRLEN(key), hash_hash(key));
91	}
92
93	/// Like hash_find, but key is not NUL-terminated
94	///
95	/// @param[in] ht Hashtab to look in.
96	/// @param[in] key Key of the looked-for item. Must not be NULL.
97	/// @param[in] len Key length.
98	///
99	/// @return Pointer to the hash item corresponding to the given key.
100	/// If not found, then return pointer to the empty item that would be
101	/// used for that key.
102	///
103	/// @warning Returned pointer becomes invalid as soon as the hash table
104	/// is changed in any way.
105	hashitem_T hash_find_len(const* hashtab_T *const ht, const char *const key,
106	const size_t len)
107	{
108	return hash_lookup(ht, key, len, hash_hash_len(key, len));
109	}
110
111	/// Like hash_find(), but caller computes "hash".
112	///
113	/// @param[in] key The key of the looked-for item. Must not be NULL.
114	/// @param[in] key_len Key length.
115	/// @param[in] hash The precomputed hash for the key.
116	///
117	/// @return Pointer to the hashitem corresponding to the given key.
118	/// If not found, then return pointer to the empty item that would be
119	/// used for that key.
120	/// WARNING: Returned pointer becomes invalid as soon as the hash table
121	/// is changed in any way.
122	hashitem_T hash_lookup(const* hashtab_T *const ht,
123	const char *const key, const size_t key_len,
124	const hash_T hash)
125	{
126	#ifdef HT_DEBUG
127	hash_count_lookup++;
128	#endif // ifdef HT_DEBUG
129
130	// Quickly handle the most common situations:
131	// - return if there is no item at all
132	// - skip over a removed item
133	// - return if the item matches
134	hash_T idx = hash & ht->ht_mask;
135	hashitem_T *hi = &ht->ht_array[idx];
136
137	if (hi->hi_key == NULL) {
138	return hi;
139	}
140
141	hashitem_T *freeitem = NULL;
142	if (hi->hi_key == HI_KEY_REMOVED) {
143	freeitem = hi;
144	} else if ((hi->hi_hash == hash)
145	&& (STRNCMP(hi->hi_key, key, key_len) == `0`)
146	&& hi->hi_key[key_len] == NUL) {
147	return hi;
148	}
149
150	// Need to search through the table to find the key. The algorithm
151	// to step through the table starts with large steps, gradually becoming
152	// smaller down to (1/4 table size + 1). This means it goes through all
153	// table entries in the end.
154	// When we run into a NULL key it's clear that the key isn't there.
155	// Return the first available slot found (can be a slot of a removed
156	// item).
157	for (hash_T perturb = hash;; perturb >>= PERTURB_SHIFT) {
158	#ifdef HT_DEBUG
159	// count a "miss" for hashtab lookup
160	hash_count_perturb++;
161	#endif // ifdef HT_DEBUG
162	idx = `5` * idx + perturb + `1`;
163	hi = &ht->ht_array[idx & ht->ht_mask];
164
165	if (hi->hi_key == NULL) {
166	return freeitem == NULL ? hi : freeitem;
167	}
168
169	if ((hi->hi_hash == hash)
170	&& (hi->hi_key != HI_KEY_REMOVED)
171	&& (STRNCMP(hi->hi_key, key, key_len) == `0`)
172	&& hi->hi_key[key_len] == NUL) {
173	return hi;
174	}
175
176	if ((hi->hi_key == HI_KEY_REMOVED) && (freeitem == NULL)) {
177	freeitem = hi;
178	}
179	}
180	}
181
182	/// Print the efficiency of hashtable lookups.
183	///
184	/// Useful when trying different hash algorithms.
185	/// Called when exiting.
186	void hash_debug_results(void)
187	{
188	#ifdef HT_DEBUG
189	fprintf(stderr, "\r\n\r\n\r\n\r\n");
190	fprintf(stderr, "Number of hashtable lookups: %" PRId64 "\r\n",
191	(int64_t)hash_count_lookup);
192	fprintf(stderr, "Number of perturb loops: %" PRId64 "\r\n",
193	(int64_t)hash_count_perturb);
194	fprintf(stderr, "Percentage of perturb loops: %" PRId64 "%%\r\n",
195	(int64_t)(hash_count_perturb * `100` / hash_count_lookup));
196	#endif // ifdef HT_DEBUG
197	}
198
199	/// Add item for key "key" to hashtable "ht".
200	///
201	/// @param key Pointer to the key for the new item. The key has to be contained
202	/// in the new item (@see hashitem_T). Must not be NULL.
203	///
204	/// @return OK if success.
205	/// FAIL if key already present
206	int hash_add(hashtab_T ht, char_u key)
207	{
208	hash_T hash = hash_hash(key);
209	hashitem_T hi = hash_lookup(ht, (const* char *)key, STRLEN(key), hash);
210	if (!HASHITEM_EMPTY(hi)) {
211	internal_error("hash_add()");
212	return FAIL;
213	}
214	hash_add_item(ht, hi, key, hash);
215	return OK;
216	}
217
218	/// Add item "hi" for key "key" to hashtable "ht".
219	///
220	/// @param hi The hash item to be used. Must have been obtained through
221	/// hash_lookup() and point to an empty item.
222	/// @param key Pointer to the key for the new item. The key has to be contained
223	/// in the new item (@see hashitem_T). Must not be NULL.
224	/// @param hash The precomputed hash value for the key.
225	void hash_add_item(hashtab_T ht, hashitem_T hi, char_u *key, hash_T hash)
226	{
227	ht->ht_used++;
228	if (hi->hi_key == NULL) {
229	ht->ht_filled++;
230	}
231	hi->hi_key = key;
232	hi->hi_hash = hash;
233
234	// When the space gets low may resize the array.
235	hash_may_resize(ht, `0`);
236	}
237
238	/// Remove item "hi" from hashtable "ht".
239	///
240	/// Caller must take care of freeing the item itself.
241	///
242	/// @param hi The hash item to be removed.
243	/// It must have been obtained with hash_lookup().
244	void hash_remove(hashtab_T ht, hashitem_T hi)
245	{
246	ht->ht_used--;
247	hi->hi_key = HI_KEY_REMOVED;
248	hash_may_resize(ht, `0`);
249	}
250
251	/// Lock hashtable (prevent changes in ht_array).
252	///
253	/// Don't use this when items are to be added!
254	/// Must call hash_unlock() later.
255	void hash_lock(hashtab_T *ht)
256	{
257	ht->ht_locked++;
258	}
259
260	/// Unlock hashtable (allow changes in ht_array again).
261	///
262	/// Table will be resized (shrunk) when necessary.
263	/// This must balance a call to hash_lock().
264	void hash_unlock(hashtab_T *ht)
265	{
266	ht->ht_locked--;
267	hash_may_resize(ht, `0`);
268	}
269
270	/// Resize hastable (new size can be given or automatically computed).
271	///
272	/// @param minitems Minimum number of items the new table should hold.
273	/// If zero, new size will depend on currently used items:
274	/// - Shrink when too much empty space.
275	/// - Grow when not enough empty space.
276	/// If non-zero, passed minitems will be used.
277	static void hash_may_resize(hashtab_T *ht, size_t minitems)
278	{
279	// Don't resize a locked table.
280	if (ht->ht_locked > `0`) {
281	return;
282	}
283
284	#ifdef HT_DEBUG
285	if (ht->ht_used > ht->ht_filled) {
286	EMSG("hash_may_resize(): more used than filled");
287	}
288
289	if (ht->ht_filled >= ht->ht_mask + `1`) {
290	EMSG("hash_may_resize(): table completely filled");
291	}
292	#endif // ifdef HT_DEBUG
293
294	size_t minsize;
295	if (minitems == `0`) {
296	// Return quickly for small tables with at least two NULL items.
297	// items are required for the lookup to decide a key isn't there.
298	if ((ht->ht_filled < HT_INIT_SIZE - `1`)
299	&& (ht->ht_array == ht->ht_smallarray)) {
300	return;
301	}
302
303	// Grow or refill the array when it's more than 2/3 full (including
304	// removed items, so that they get cleaned up).
305	// Shrink the array when it's less than 1/5 full. When growing it is
306	// at least 1/4 full (avoids repeated grow-shrink operations)
307	size_t oldsize = ht->ht_mask + `1`;
308	if ((ht->ht_filled * `3` < oldsize * `2`) && (ht->ht_used > oldsize / `5`)) {
309	return;
310	}
311
312	if (ht->ht_used > `1000`) {
313	// it's big, don't make too much room
314	minsize = ht->ht_used * `2`;
315	} else {
316	// make plenty of room
317	minsize = ht->ht_used * `4`;
318	}
319	} else {
320	// Use specified size.
321	if (minitems < ht->ht_used) {
322	// just in case...
323	minitems = ht->ht_used;
324	}
325	// array is up to 2/3 full
326	minsize = minitems * `3` / `2`;
327	}
328
329	size_t newsize = HT_INIT_SIZE;
330	while (newsize < minsize) {
331	// make sure it's always a power of 2
332	newsize <<= `1`;
333	// assert newsize didn't overflow
334	assert(newsize != `0`);
335	}
336
337	bool newarray_is_small = newsize == HT_INIT_SIZE;
338	bool keep_smallarray = newarray_is_small
339	&& ht->ht_array == ht->ht_smallarray;
340
341	// Make sure that oldarray and newarray do not overlap,
342	// so that copying is possible.
343	hashitem_T temparray[HT_INIT_SIZE];
344	hashitem_T *oldarray = keep_smallarray
345	? memcpy(temparray, ht->ht_smallarray, sizeof(temparray))
346	: ht->ht_array;
347	hashitem_T *newarray = newarray_is_small
348	? ht->ht_smallarray
349	: xmalloc(sizeof(hashitem_T) * newsize);
350
351	memset(newarray, `0`, sizeof(hashitem_T) * newsize);
352
353	// Move all the items from the old array to the new one, placing them in
354	// the right spot. The new array won't have any removed items, thus this
355	// is also a cleanup action.
356	hash_T newmask = newsize - `1`;
357	size_t todo = ht->ht_used;
358
359	for (hashitem_T *olditem = oldarray; todo > `0`; ++olditem) {
360	if (HASHITEM_EMPTY(olditem)) {
361	continue;
362	}
363	// The algorithm to find the spot to add the item is identical to
364	// the algorithm to find an item in hash_lookup(). But we only
365	// need to search for a NULL key, thus it's simpler.
366	hash_T newi = olditem->hi_hash & newmask;
367	hashitem_T *newitem = &newarray[newi];
368	if (newitem->hi_key != NULL) {
369	for (hash_T perturb = olditem->hi_hash;; perturb >>= PERTURB_SHIFT) {
370	newi = `5` * newi + perturb + `1`;
371	newitem = &newarray[newi & newmask];
372	if (newitem->hi_key == NULL) {
373	break;
374	}
375	}
376	}
377	newitem = olditem;
378	todo--;
379	}
380
381	if (ht->ht_array != ht->ht_smallarray) {
382	xfree(ht->ht_array);
383	}
384	ht->ht_array = newarray;
385	ht->ht_mask = newmask;
386	ht->ht_filled = ht->ht_used;
387	}
388
389	#define HASH_CYCLE_BODY(hash, p) \
390	hash = hash * 101 + *p++
391
392	/// Get the hash number for a key.
393	///
394	/// If you think you know a better hash function: Compile with HT_DEBUG set and
395	/// run a script that uses hashtables a lot. Vim will then print statistics
396	/// when exiting. Try that with the current hash algorithm and yours. The
397	/// lower the percentage the better.
398	hash_T hash_hash(const char_u *key)
399	{
400	hash_T hash = *key;
401
402	if (hash == `0`) {
403	return (hash_T)`0`;
404	}
405
406	// A simplistic algorithm that appears to do very well.
407	// Suggested by George Reilly.
408	const uint8_t *p = key + `1`;
409	while (*p != NUL) {
410	HASH_CYCLE_BODY(hash, p);
411	}
412
413	return hash;
414	}
415
416	/// Get the hash number for a key that is not a NUL-terminated string
417	///
418	/// @warning Function does not check whether key contains NUL. But you will not
419	/// be able to get hash entry in this case.
420	///
421	/// @param[in] key Key.
422	/// @param[in] len Key length.
423	///
424	/// @return Key hash.
425	hash_T hash_hash_len(const char key, const* size_t len)
426	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
427	{
428	if (len == `0`) {
429	return `0`;
430	}
431
432	hash_T hash = (uint8_t )key;
433	const uint8_t end = (uint8_t )key + len;
434
435	const uint8_t p = (const* uint8_t *)key + `1`;
436	while (p < end) {
437	HASH_CYCLE_BODY(hash, p);
438	}
439
440	return hash;
441	}
442
443	#undef HASH_CYCLE_BODY
444
445	/// Function to get HI_KEY_REMOVED value
446	///
447	/// Used for testing because luajit ffi does not allow getting addresses of
448	/// globals.
449	const char_u _hash_key_removed(void*)
450	FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
451	{
452	return HI_KEY_REMOVED;
453	}
454

Browse the source code of neovim/src/nvim/hashtab.c