1/* Copyright (c) 2006, 2018, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2018, MariaDB
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17/*
18 extensible hash
19
20 TODO
21 try to get rid of dummy nodes ?
22 for non-unique hash, count only _distinct_ values
23 (but how to do it in lf_hash_delete ?)
24*/
25#include <my_global.h>
26#include <m_string.h>
27#include <my_sys.h>
28#include <mysys_err.h>
29#include <my_bit.h>
30#include <lf.h>
31
32/* An element of the list */
33typedef struct {
34 intptr volatile link; /* a pointer to the next element in a list and a flag */
35 uint32 hashnr; /* reversed hash number, for sorting */
36 const uchar *key;
37 size_t keylen;
38 /*
39 data is stored here, directly after the keylen.
40 thus the pointer to data is (void*)(slist_element_ptr+1)
41 */
42} LF_SLIST;
43
44const int LF_HASH_OVERHEAD= sizeof(LF_SLIST);
45
46/*
47 a structure to pass the context (pointers two the three successive elements
48 in a list) from l_find to l_insert/l_delete
49*/
50typedef struct {
51 intptr volatile *prev;
52 LF_SLIST *curr, *next;
53} CURSOR;
54
55/*
56 the last bit in LF_SLIST::link is a "deleted" flag.
57 the helper macros below convert it to a pure pointer or a pure flag
58*/
59#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1))
60#define DELETED(V) ((V) & 1)
61
62/** walk the list, searching for an element or invoking a callback
63
64 Search for hashnr/key/keylen in the list starting from 'head' and
65 position the cursor. The list is ORDER BY hashnr, key
66
67 @param head start walking the list from this node
68 @param cs charset for comparing keys, NULL if callback is used
69 @param hashnr hash number to search for
70 @param key key to search for OR data for the callback
71 @param keylen length of the key to compare, 0 if callback is used
72 @param cursor for returning the found element
73 @param pins see lf_alloc-pin.c
74 @param callback callback action, invoked for every element
75
76 @note
77 cursor is positioned in either case
78 pins[0..2] are used, they are NOT removed on return
79 callback might see some elements twice (because of retries)
80
81 @return
82 if find: 0 - not found
83 1 - found
84 if callback:
85 0 - ok
86 1 - error (callbck returned 1)
87*/
88static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
89 const uchar *key, size_t keylen, CURSOR *cursor, LF_PINS *pins,
90 my_hash_walk_action callback)
91{
92 uint32 cur_hashnr;
93 const uchar *cur_key;
94 size_t cur_keylen;
95 intptr link;
96
97 DBUG_ASSERT(!cs || !callback); /* should not be set both */
98 DBUG_ASSERT(!keylen || !callback); /* should not be set both */
99
100retry:
101 cursor->prev= (intptr *)head;
102 do { /* PTR() isn't necessary below, head is a dummy node */
103 cursor->curr= (LF_SLIST *)(*cursor->prev);
104 lf_pin(pins, 1, cursor->curr);
105 } while (my_atomic_loadptr((void**)cursor->prev) != cursor->curr &&
106 LF_BACKOFF());
107 for (;;)
108 {
109 if (unlikely(!cursor->curr))
110 return 0; /* end of the list */
111
112 cur_hashnr= cursor->curr->hashnr;
113 cur_keylen= cursor->curr->keylen;
114 cur_key= cursor->curr->key;
115
116 do {
117 link= cursor->curr->link;
118 cursor->next= PTR(link);
119 lf_pin(pins, 0, cursor->next);
120 } while (link != cursor->curr->link && LF_BACKOFF());
121
122 if (!DELETED(link))
123 {
124 if (unlikely(callback))
125 {
126 if (cur_hashnr & 1 && callback(cursor->curr + 1, (void*)key))
127 return 1;
128 }
129 else if (cur_hashnr >= hashnr)
130 {
131 int r= 1;
132 if (cur_hashnr > hashnr ||
133 (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0)
134 return !r;
135 }
136 cursor->prev= &(cursor->curr->link);
137 if (!(cur_hashnr & 1)) /* dummy node */
138 head= (LF_SLIST **)cursor->prev;
139 lf_pin(pins, 2, cursor->curr);
140 }
141 else
142 {
143 /*
144 we found a deleted node - be nice, help the other thread
145 and remove this deleted node
146 */
147 if (my_atomic_casptr((void **) cursor->prev,
148 (void **) &cursor->curr, cursor->next) && LF_BACKOFF())
149 lf_alloc_free(pins, cursor->curr);
150 else
151 goto retry;
152 }
153 cursor->curr= cursor->next;
154 lf_pin(pins, 1, cursor->curr);
155 }
156}
157
158/*
159 DESCRIPTION
160 insert a 'node' in the list that starts from 'head' in the correct
161 position (as found by l_find)
162
163 RETURN
164 0 - inserted
165 not 0 - a pointer to a duplicate (not pinned and thus unusable)
166
167 NOTE
168 it uses pins[0..2], on return all pins are removed.
169 if there're nodes with the same key value, a new node is added before them.
170*/
171static LF_SLIST *l_insert(LF_SLIST * volatile *head, CHARSET_INFO *cs,
172 LF_SLIST *node, LF_PINS *pins, uint flags)
173{
174 CURSOR cursor;
175 int res;
176
177 for (;;)
178 {
179 if (l_find(head, cs, node->hashnr, node->key, node->keylen,
180 &cursor, pins, 0) &&
181 (flags & LF_HASH_UNIQUE))
182 {
183 res= 0; /* duplicate found */
184 break;
185 }
186 else
187 {
188 node->link= (intptr)cursor.curr;
189 DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
190 DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
191 if (my_atomic_casptr((void **) cursor.prev,
192 (void **)(char*) &cursor.curr, node))
193 {
194 res= 1; /* inserted ok */
195 break;
196 }
197 }
198 }
199 lf_unpin(pins, 0);
200 lf_unpin(pins, 1);
201 lf_unpin(pins, 2);
202 /*
203 Note that cursor.curr is not pinned here and the pointer is unreliable,
204 the object may disappear anytime. But if it points to a dummy node, the
205 pointer is safe, because dummy nodes are never freed - initialize_bucket()
206 uses this fact.
207 */
208 return res ? 0 : cursor.curr;
209}
210
211/*
212 DESCRIPTION
213 deletes a node as identified by hashnr/keey/keylen from the list
214 that starts from 'head'
215
216 RETURN
217 0 - ok
218 1 - not found
219
220 NOTE
221 it uses pins[0..2], on return all pins are removed.
222*/
223static int l_delete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
224 const uchar *key, uint keylen, LF_PINS *pins)
225{
226 CURSOR cursor;
227 int res;
228
229 for (;;)
230 {
231 if (!l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0))
232 {
233 res= 1; /* not found */
234 break;
235 }
236 else
237 {
238 /* mark the node deleted */
239 if (my_atomic_casptr((void **) (char*) &(cursor.curr->link),
240 (void **) (char*) &cursor.next,
241 (void *)(((intptr)cursor.next) | 1)))
242 {
243 /* and remove it from the list */
244 if (my_atomic_casptr((void **)cursor.prev,
245 (void **)(char*)&cursor.curr, cursor.next))
246 lf_alloc_free(pins, cursor.curr);
247 else
248 {
249 /*
250 somebody already "helped" us and removed the node ?
251 Let's check if we need to help that someone too!
252 (to ensure the number of "set DELETED flag" actions
253 is equal to the number of "remove from the list" actions)
254 */
255 l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
256 }
257 res= 0;
258 break;
259 }
260 }
261 }
262 lf_unpin(pins, 0);
263 lf_unpin(pins, 1);
264 lf_unpin(pins, 2);
265 return res;
266}
267
268/*
269 DESCRIPTION
270 searches for a node as identified by hashnr/keey/keylen in the list
271 that starts from 'head'
272
273 RETURN
274 0 - not found
275 node - found
276
277 NOTE
278 it uses pins[0..2], on return the pin[2] keeps the node found
279 all other pins are removed.
280*/
281static LF_SLIST *l_search(LF_SLIST * volatile *head, CHARSET_INFO *cs,
282 uint32 hashnr, const uchar *key, uint keylen,
283 LF_PINS *pins)
284{
285 CURSOR cursor;
286 int res= l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
287 if (res)
288 lf_pin(pins, 2, cursor.curr);
289 else
290 lf_unpin(pins, 2);
291 lf_unpin(pins, 1);
292 lf_unpin(pins, 0);
293 return res ? cursor.curr : 0;
294}
295
296static inline const uchar* hash_key(const LF_HASH *hash,
297 const uchar *record, size_t *length)
298{
299 if (hash->get_key)
300 return (*hash->get_key)(record, length, 0);
301 *length= hash->key_length;
302 return record + hash->key_offset;
303}
304
305/*
306 Compute the hash key value from the raw key.
307
308 @note, that the hash value is limited to 2^31, because we need one
309 bit to distinguish between normal and dummy nodes.
310*/
311static inline my_hash_value_type calc_hash(CHARSET_INFO *cs,
312 const uchar *key,
313 size_t keylen)
314{
315 ulong nr1= 1, nr2= 4;
316 cs->coll->hash_sort(cs, (uchar*) key, keylen, &nr1, &nr2);
317 return nr1;
318}
319
320#define MAX_LOAD 1.0 /* average number of elements in a bucket */
321
322static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *);
323
324static void default_initializer(LF_HASH *hash, void *dst, const void *src)
325{
326 memcpy(dst, src, hash->element_size);
327}
328
329/*
330 Initializes lf_hash, the arguments are compatible with hash_init
331
332 @note element_size sets both the size of allocated memory block for
333 lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
334 they are the same, indeed. But LF_HASH::element_size can be decreased
335 after lf_hash_init, and then lf_alloc will allocate larger block that
336 lf_hash_insert will copy over. It is desirable if part of the element
337 is expensive to initialize - for example if there is a mutex or
338 DYNAMIC_ARRAY. In this case they should be initialize in the
339 LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
340
341 The above works well with PODS. For more complex cases (e.g. C++ classes
342 with private members) use initializer function.
343*/
344void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
345 uint key_offset, uint key_length, my_hash_get_key get_key,
346 CHARSET_INFO *charset)
347{
348 lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
349 offsetof(LF_SLIST, key));
350 lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
351 hash->size= 1;
352 hash->count= 0;
353 hash->element_size= element_size;
354 hash->flags= flags;
355 hash->charset= charset ? charset : &my_charset_bin;
356 hash->key_offset= key_offset;
357 hash->key_length= key_length;
358 hash->get_key= get_key;
359 hash->initializer= default_initializer;
360 hash->hash_function= calc_hash;
361 DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
362}
363
364void lf_hash_destroy(LF_HASH *hash)
365{
366 LF_SLIST *el, **head= (LF_SLIST **)lf_dynarray_value(&hash->array, 0);
367
368 if (head)
369 {
370 el= *head;
371 while (el)
372 {
373 intptr next= el->link;
374 if (el->hashnr & 1)
375 lf_alloc_direct_free(&hash->alloc, el); /* normal node */
376 else
377 my_free(el); /* dummy node */
378 el= (LF_SLIST *)next;
379 }
380 }
381 lf_alloc_destroy(&hash->alloc);
382 lf_dynarray_destroy(&hash->array);
383}
384
385/*
386 DESCRIPTION
387 inserts a new element to a hash. it will have a _copy_ of
388 data, not a pointer to it.
389
390 RETURN
391 0 - inserted
392 1 - didn't (unique key conflict)
393 -1 - out of memory
394
395 NOTE
396 see l_insert() for pin usage notes
397*/
398int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
399{
400 int csize, bucket, hashnr;
401 LF_SLIST *node, * volatile *el;
402
403 node= (LF_SLIST *)lf_alloc_new(pins);
404 if (unlikely(!node))
405 return -1;
406 hash->initializer(hash, node + 1, data);
407 node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
408 hashnr= hash->hash_function(hash->charset, node->key, node->keylen) & INT_MAX32;
409 bucket= hashnr % hash->size;
410 el= lf_dynarray_lvalue(&hash->array, bucket);
411 if (unlikely(!el))
412 return -1;
413 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
414 return -1;
415 node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
416 if (l_insert(el, hash->charset, node, pins, hash->flags))
417 {
418 lf_alloc_free(pins, node);
419 return 1;
420 }
421 csize= hash->size;
422 if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
423 my_atomic_cas32(&hash->size, &csize, csize*2);
424 return 0;
425}
426
427/*
428 DESCRIPTION
429 deletes an element with the given key from the hash (if a hash is
430 not unique and there're many elements with this key - the "first"
431 matching element is deleted)
432 RETURN
433 0 - deleted
434 1 - didn't (not found)
435 NOTE
436 see l_delete() for pin usage notes
437*/
438int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
439{
440 LF_SLIST * volatile *el;
441 uint bucket, hashnr;
442
443 hashnr= hash->hash_function(hash->charset, (uchar *)key, keylen) & INT_MAX32;
444
445 /* hide OOM errors - if we cannot initialize a bucket, try the previous one */
446 for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
447 {
448 el= lf_dynarray_lvalue(&hash->array, bucket);
449 if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
450 break;
451 if (unlikely(bucket == 0))
452 return 1; /* if there's no bucket==0, the hash is empty */
453 }
454 if (l_delete(el, hash->charset, my_reverse_bits(hashnr) | 1,
455 (uchar *)key, keylen, pins))
456 {
457 return 1;
458 }
459 my_atomic_add32(&hash->count, -1);
460 return 0;
461}
462
463/*
464 RETURN
465 a pointer to an element with the given key (if a hash is not unique and
466 there're many elements with this key - the "first" matching element)
467 NULL if nothing is found
468
469 NOTE
470 see l_search() for pin usage notes
471*/
472void *lf_hash_search_using_hash_value(LF_HASH *hash, LF_PINS *pins,
473 my_hash_value_type hashnr,
474 const void *key, uint keylen)
475{
476 LF_SLIST * volatile *el, *found;
477 uint bucket;
478
479 /* hide OOM errors - if we cannot initialize a bucket, try the previous one */
480 for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
481 {
482 el= lf_dynarray_lvalue(&hash->array, bucket);
483 if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
484 break;
485 if (unlikely(bucket == 0))
486 return 0; /* if there's no bucket==0, the hash is empty */
487 }
488 found= l_search(el, hash->charset, my_reverse_bits(hashnr) | 1,
489 (uchar *)key, keylen, pins);
490 return found ? found+1 : 0;
491}
492
493
494/**
495 Iterate over all elements in hash and call function with the element
496
497 @note
498 If one of 'action' invocations returns 1 the iteration aborts.
499 'action' might see some elements twice!
500
501 @retval 0 ok
502 @retval 1 error (action returned 1)
503*/
504int lf_hash_iterate(LF_HASH *hash, LF_PINS *pins,
505 my_hash_walk_action action, void *argument)
506{
507 CURSOR cursor;
508 uint bucket= 0;
509 int res;
510 LF_SLIST * volatile *el;
511
512 el= lf_dynarray_lvalue(&hash->array, bucket);
513 if (unlikely(!el))
514 return 0; /* if there's no bucket==0, the hash is empty */
515 if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
516 return 0; /* if there's no bucket==0, the hash is empty */
517
518 res= l_find(el, 0, 0, (uchar*)argument, 0, &cursor, pins, action);
519
520 lf_unpin(pins, 2);
521 lf_unpin(pins, 1);
522 lf_unpin(pins, 0);
523 return res;
524}
525
526void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
527{
528 return lf_hash_search_using_hash_value(hash, pins,
529 hash->hash_function(hash->charset,
530 (uchar*) key,
531 keylen) & INT_MAX32,
532 key, keylen);
533}
534
535static const uchar *dummy_key= (uchar*)"";
536
537/*
538 RETURN
539 0 - ok
540 -1 - out of memory
541*/
542static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node,
543 uint bucket, LF_PINS *pins)
544{
545 uint parent= my_clear_highest_bit(bucket);
546 LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME));
547 LF_SLIST **tmp= 0, *cur;
548 LF_SLIST * volatile *el= lf_dynarray_lvalue(&hash->array, parent);
549 if (unlikely(!el || !dummy))
550 return -1;
551 if (*el == NULL && bucket &&
552 unlikely(initialize_bucket(hash, el, parent, pins)))
553 {
554 my_free(dummy);
555 return -1;
556 }
557 dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
558 dummy->key= dummy_key;
559 dummy->keylen= 0;
560 if ((cur= l_insert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
561 {
562 my_free(dummy);
563 dummy= cur;
564 }
565 my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy);
566 /*
567 note that if the CAS above failed (after l_insert() succeeded),
568 it would mean that some other thread has executed l_insert() for
569 the same dummy node, its l_insert() failed, it picked up our
570 dummy node (in "dummy= cur") and executed the same CAS as above.
571 Which means that even if CAS above failed we don't need to retry,
572 and we should not free(dummy) - there's no memory leak here
573 */
574 return 0;
575}
576