1 | /* Copyright (c) 2006, 2018, Oracle and/or its affiliates. |
2 | Copyright (c) 2009, 2018, MariaDB |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
16 | |
17 | /* |
18 | extensible hash |
19 | |
20 | TODO |
21 | try to get rid of dummy nodes ? |
22 | for non-unique hash, count only _distinct_ values |
23 | (but how to do it in lf_hash_delete ?) |
24 | */ |
25 | #include <my_global.h> |
26 | #include <m_string.h> |
27 | #include <my_sys.h> |
28 | #include <mysys_err.h> |
29 | #include <my_bit.h> |
30 | #include <lf.h> |
31 | |
32 | /* An element of the list */ |
33 | typedef struct { |
34 | intptr volatile link; /* a pointer to the next element in a list and a flag */ |
35 | uint32 hashnr; /* reversed hash number, for sorting */ |
36 | const uchar *key; |
37 | size_t keylen; |
38 | /* |
39 | data is stored here, directly after the keylen. |
40 | thus the pointer to data is (void*)(slist_element_ptr+1) |
41 | */ |
42 | } LF_SLIST; |
43 | |
44 | const int LF_HASH_OVERHEAD= sizeof(LF_SLIST); |
45 | |
46 | /* |
47 | a structure to pass the context (pointers two the three successive elements |
48 | in a list) from l_find to l_insert/l_delete |
49 | */ |
50 | typedef struct { |
51 | intptr volatile *prev; |
52 | LF_SLIST *curr, *next; |
53 | } CURSOR; |
54 | |
55 | /* |
56 | the last bit in LF_SLIST::link is a "deleted" flag. |
57 | the helper macros below convert it to a pure pointer or a pure flag |
58 | */ |
59 | #define PTR(V) (LF_SLIST *)((V) & (~(intptr)1)) |
60 | #define DELETED(V) ((V) & 1) |
61 | |
62 | /** walk the list, searching for an element or invoking a callback |
63 | |
64 | Search for hashnr/key/keylen in the list starting from 'head' and |
65 | position the cursor. The list is ORDER BY hashnr, key |
66 | |
67 | @param head start walking the list from this node |
68 | @param cs charset for comparing keys, NULL if callback is used |
69 | @param hashnr hash number to search for |
70 | @param key key to search for OR data for the callback |
71 | @param keylen length of the key to compare, 0 if callback is used |
72 | @param cursor for returning the found element |
73 | @param pins see lf_alloc-pin.c |
74 | @param callback callback action, invoked for every element |
75 | |
76 | @note |
77 | cursor is positioned in either case |
78 | pins[0..2] are used, they are NOT removed on return |
79 | callback might see some elements twice (because of retries) |
80 | |
81 | @return |
82 | if find: 0 - not found |
83 | 1 - found |
84 | if callback: |
85 | 0 - ok |
86 | 1 - error (callbck returned 1) |
87 | */ |
88 | static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, |
89 | const uchar *key, size_t keylen, CURSOR *cursor, LF_PINS *pins, |
90 | my_hash_walk_action callback) |
91 | { |
92 | uint32 cur_hashnr; |
93 | const uchar *cur_key; |
94 | size_t cur_keylen; |
95 | intptr link; |
96 | |
97 | DBUG_ASSERT(!cs || !callback); /* should not be set both */ |
98 | DBUG_ASSERT(!keylen || !callback); /* should not be set both */ |
99 | |
100 | retry: |
101 | cursor->prev= (intptr *)head; |
102 | do { /* PTR() isn't necessary below, head is a dummy node */ |
103 | cursor->curr= (LF_SLIST *)(*cursor->prev); |
104 | lf_pin(pins, 1, cursor->curr); |
105 | } while (my_atomic_loadptr((void**)cursor->prev) != cursor->curr && |
106 | LF_BACKOFF()); |
107 | for (;;) |
108 | { |
109 | if (unlikely(!cursor->curr)) |
110 | return 0; /* end of the list */ |
111 | |
112 | cur_hashnr= cursor->curr->hashnr; |
113 | cur_keylen= cursor->curr->keylen; |
114 | cur_key= cursor->curr->key; |
115 | |
116 | do { |
117 | link= cursor->curr->link; |
118 | cursor->next= PTR(link); |
119 | lf_pin(pins, 0, cursor->next); |
120 | } while (link != cursor->curr->link && LF_BACKOFF()); |
121 | |
122 | if (!DELETED(link)) |
123 | { |
124 | if (unlikely(callback)) |
125 | { |
126 | if (cur_hashnr & 1 && callback(cursor->curr + 1, (void*)key)) |
127 | return 1; |
128 | } |
129 | else if (cur_hashnr >= hashnr) |
130 | { |
131 | int r= 1; |
132 | if (cur_hashnr > hashnr || |
133 | (r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0) |
134 | return !r; |
135 | } |
136 | cursor->prev= &(cursor->curr->link); |
137 | if (!(cur_hashnr & 1)) /* dummy node */ |
138 | head= (LF_SLIST **)cursor->prev; |
139 | lf_pin(pins, 2, cursor->curr); |
140 | } |
141 | else |
142 | { |
143 | /* |
144 | we found a deleted node - be nice, help the other thread |
145 | and remove this deleted node |
146 | */ |
147 | if (my_atomic_casptr((void **) cursor->prev, |
148 | (void **) &cursor->curr, cursor->next) && LF_BACKOFF()) |
149 | lf_alloc_free(pins, cursor->curr); |
150 | else |
151 | goto retry; |
152 | } |
153 | cursor->curr= cursor->next; |
154 | lf_pin(pins, 1, cursor->curr); |
155 | } |
156 | } |
157 | |
158 | /* |
159 | DESCRIPTION |
160 | insert a 'node' in the list that starts from 'head' in the correct |
161 | position (as found by l_find) |
162 | |
163 | RETURN |
164 | 0 - inserted |
165 | not 0 - a pointer to a duplicate (not pinned and thus unusable) |
166 | |
167 | NOTE |
168 | it uses pins[0..2], on return all pins are removed. |
169 | if there're nodes with the same key value, a new node is added before them. |
170 | */ |
171 | static LF_SLIST *l_insert(LF_SLIST * volatile *head, CHARSET_INFO *cs, |
172 | LF_SLIST *node, LF_PINS *pins, uint flags) |
173 | { |
174 | CURSOR cursor; |
175 | int res; |
176 | |
177 | for (;;) |
178 | { |
179 | if (l_find(head, cs, node->hashnr, node->key, node->keylen, |
180 | &cursor, pins, 0) && |
181 | (flags & LF_HASH_UNIQUE)) |
182 | { |
183 | res= 0; /* duplicate found */ |
184 | break; |
185 | } |
186 | else |
187 | { |
188 | node->link= (intptr)cursor.curr; |
189 | DBUG_ASSERT(node->link != (intptr)node); /* no circular references */ |
190 | DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */ |
191 | if (my_atomic_casptr((void **) cursor.prev, |
192 | (void **)(char*) &cursor.curr, node)) |
193 | { |
194 | res= 1; /* inserted ok */ |
195 | break; |
196 | } |
197 | } |
198 | } |
199 | lf_unpin(pins, 0); |
200 | lf_unpin(pins, 1); |
201 | lf_unpin(pins, 2); |
202 | /* |
203 | Note that cursor.curr is not pinned here and the pointer is unreliable, |
204 | the object may disappear anytime. But if it points to a dummy node, the |
205 | pointer is safe, because dummy nodes are never freed - initialize_bucket() |
206 | uses this fact. |
207 | */ |
208 | return res ? 0 : cursor.curr; |
209 | } |
210 | |
211 | /* |
212 | DESCRIPTION |
213 | deletes a node as identified by hashnr/keey/keylen from the list |
214 | that starts from 'head' |
215 | |
216 | RETURN |
217 | 0 - ok |
218 | 1 - not found |
219 | |
220 | NOTE |
221 | it uses pins[0..2], on return all pins are removed. |
222 | */ |
223 | static int l_delete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, |
224 | const uchar *key, uint keylen, LF_PINS *pins) |
225 | { |
226 | CURSOR cursor; |
227 | int res; |
228 | |
229 | for (;;) |
230 | { |
231 | if (!l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0)) |
232 | { |
233 | res= 1; /* not found */ |
234 | break; |
235 | } |
236 | else |
237 | { |
238 | /* mark the node deleted */ |
239 | if (my_atomic_casptr((void **) (char*) &(cursor.curr->link), |
240 | (void **) (char*) &cursor.next, |
241 | (void *)(((intptr)cursor.next) | 1))) |
242 | { |
243 | /* and remove it from the list */ |
244 | if (my_atomic_casptr((void **)cursor.prev, |
245 | (void **)(char*)&cursor.curr, cursor.next)) |
246 | lf_alloc_free(pins, cursor.curr); |
247 | else |
248 | { |
249 | /* |
250 | somebody already "helped" us and removed the node ? |
251 | Let's check if we need to help that someone too! |
252 | (to ensure the number of "set DELETED flag" actions |
253 | is equal to the number of "remove from the list" actions) |
254 | */ |
255 | l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0); |
256 | } |
257 | res= 0; |
258 | break; |
259 | } |
260 | } |
261 | } |
262 | lf_unpin(pins, 0); |
263 | lf_unpin(pins, 1); |
264 | lf_unpin(pins, 2); |
265 | return res; |
266 | } |
267 | |
268 | /* |
269 | DESCRIPTION |
270 | searches for a node as identified by hashnr/keey/keylen in the list |
271 | that starts from 'head' |
272 | |
273 | RETURN |
274 | 0 - not found |
275 | node - found |
276 | |
277 | NOTE |
278 | it uses pins[0..2], on return the pin[2] keeps the node found |
279 | all other pins are removed. |
280 | */ |
281 | static LF_SLIST *l_search(LF_SLIST * volatile *head, CHARSET_INFO *cs, |
282 | uint32 hashnr, const uchar *key, uint keylen, |
283 | LF_PINS *pins) |
284 | { |
285 | CURSOR cursor; |
286 | int res= l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0); |
287 | if (res) |
288 | lf_pin(pins, 2, cursor.curr); |
289 | else |
290 | lf_unpin(pins, 2); |
291 | lf_unpin(pins, 1); |
292 | lf_unpin(pins, 0); |
293 | return res ? cursor.curr : 0; |
294 | } |
295 | |
296 | static inline const uchar* hash_key(const LF_HASH *hash, |
297 | const uchar *record, size_t *length) |
298 | { |
299 | if (hash->get_key) |
300 | return (*hash->get_key)(record, length, 0); |
301 | *length= hash->key_length; |
302 | return record + hash->key_offset; |
303 | } |
304 | |
305 | /* |
306 | Compute the hash key value from the raw key. |
307 | |
308 | @note, that the hash value is limited to 2^31, because we need one |
309 | bit to distinguish between normal and dummy nodes. |
310 | */ |
311 | static inline my_hash_value_type calc_hash(CHARSET_INFO *cs, |
312 | const uchar *key, |
313 | size_t keylen) |
314 | { |
315 | ulong nr1= 1, nr2= 4; |
316 | cs->coll->hash_sort(cs, (uchar*) key, keylen, &nr1, &nr2); |
317 | return nr1; |
318 | } |
319 | |
320 | #define MAX_LOAD 1.0 /* average number of elements in a bucket */ |
321 | |
322 | static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); |
323 | |
324 | static void default_initializer(LF_HASH *hash, void *dst, const void *src) |
325 | { |
326 | memcpy(dst, src, hash->element_size); |
327 | } |
328 | |
329 | /* |
330 | Initializes lf_hash, the arguments are compatible with hash_init |
331 | |
332 | @note element_size sets both the size of allocated memory block for |
333 | lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically |
334 | they are the same, indeed. But LF_HASH::element_size can be decreased |
335 | after lf_hash_init, and then lf_alloc will allocate larger block that |
336 | lf_hash_insert will copy over. It is desirable if part of the element |
337 | is expensive to initialize - for example if there is a mutex or |
338 | DYNAMIC_ARRAY. In this case they should be initialize in the |
339 | LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them. |
340 | |
341 | The above works well with PODS. For more complex cases (e.g. C++ classes |
342 | with private members) use initializer function. |
343 | */ |
344 | void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, |
345 | uint key_offset, uint key_length, my_hash_get_key get_key, |
346 | CHARSET_INFO *charset) |
347 | { |
348 | lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, |
349 | offsetof(LF_SLIST, key)); |
350 | lf_dynarray_init(&hash->array, sizeof(LF_SLIST *)); |
351 | hash->size= 1; |
352 | hash->count= 0; |
353 | hash->element_size= element_size; |
354 | hash->flags= flags; |
355 | hash->charset= charset ? charset : &my_charset_bin; |
356 | hash->key_offset= key_offset; |
357 | hash->key_length= key_length; |
358 | hash->get_key= get_key; |
359 | hash->initializer= default_initializer; |
360 | hash->hash_function= calc_hash; |
361 | DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); |
362 | } |
363 | |
364 | void lf_hash_destroy(LF_HASH *hash) |
365 | { |
366 | LF_SLIST *el, **head= (LF_SLIST **)lf_dynarray_value(&hash->array, 0); |
367 | |
368 | if (head) |
369 | { |
370 | el= *head; |
371 | while (el) |
372 | { |
373 | intptr next= el->link; |
374 | if (el->hashnr & 1) |
375 | lf_alloc_direct_free(&hash->alloc, el); /* normal node */ |
376 | else |
377 | my_free(el); /* dummy node */ |
378 | el= (LF_SLIST *)next; |
379 | } |
380 | } |
381 | lf_alloc_destroy(&hash->alloc); |
382 | lf_dynarray_destroy(&hash->array); |
383 | } |
384 | |
385 | /* |
386 | DESCRIPTION |
387 | inserts a new element to a hash. it will have a _copy_ of |
388 | data, not a pointer to it. |
389 | |
390 | RETURN |
391 | 0 - inserted |
392 | 1 - didn't (unique key conflict) |
393 | -1 - out of memory |
394 | |
395 | NOTE |
396 | see l_insert() for pin usage notes |
397 | */ |
398 | int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) |
399 | { |
400 | int csize, bucket, hashnr; |
401 | LF_SLIST *node, * volatile *el; |
402 | |
403 | node= (LF_SLIST *)lf_alloc_new(pins); |
404 | if (unlikely(!node)) |
405 | return -1; |
406 | hash->initializer(hash, node + 1, data); |
407 | node->key= hash_key(hash, (uchar *)(node+1), &node->keylen); |
408 | hashnr= hash->hash_function(hash->charset, node->key, node->keylen) & INT_MAX32; |
409 | bucket= hashnr % hash->size; |
410 | el= lf_dynarray_lvalue(&hash->array, bucket); |
411 | if (unlikely(!el)) |
412 | return -1; |
413 | if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) |
414 | return -1; |
415 | node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */ |
416 | if (l_insert(el, hash->charset, node, pins, hash->flags)) |
417 | { |
418 | lf_alloc_free(pins, node); |
419 | return 1; |
420 | } |
421 | csize= hash->size; |
422 | if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) |
423 | my_atomic_cas32(&hash->size, &csize, csize*2); |
424 | return 0; |
425 | } |
426 | |
427 | /* |
428 | DESCRIPTION |
429 | deletes an element with the given key from the hash (if a hash is |
430 | not unique and there're many elements with this key - the "first" |
431 | matching element is deleted) |
432 | RETURN |
433 | 0 - deleted |
434 | 1 - didn't (not found) |
435 | NOTE |
436 | see l_delete() for pin usage notes |
437 | */ |
438 | int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) |
439 | { |
440 | LF_SLIST * volatile *el; |
441 | uint bucket, hashnr; |
442 | |
443 | hashnr= hash->hash_function(hash->charset, (uchar *)key, keylen) & INT_MAX32; |
444 | |
445 | /* hide OOM errors - if we cannot initialize a bucket, try the previous one */ |
446 | for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket)) |
447 | { |
448 | el= lf_dynarray_lvalue(&hash->array, bucket); |
449 | if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0)) |
450 | break; |
451 | if (unlikely(bucket == 0)) |
452 | return 1; /* if there's no bucket==0, the hash is empty */ |
453 | } |
454 | if (l_delete(el, hash->charset, my_reverse_bits(hashnr) | 1, |
455 | (uchar *)key, keylen, pins)) |
456 | { |
457 | return 1; |
458 | } |
459 | my_atomic_add32(&hash->count, -1); |
460 | return 0; |
461 | } |
462 | |
463 | /* |
464 | RETURN |
465 | a pointer to an element with the given key (if a hash is not unique and |
466 | there're many elements with this key - the "first" matching element) |
467 | NULL if nothing is found |
468 | |
469 | NOTE |
470 | see l_search() for pin usage notes |
471 | */ |
472 | void *lf_hash_search_using_hash_value(LF_HASH *hash, LF_PINS *pins, |
473 | my_hash_value_type hashnr, |
474 | const void *key, uint keylen) |
475 | { |
476 | LF_SLIST * volatile *el, *found; |
477 | uint bucket; |
478 | |
479 | /* hide OOM errors - if we cannot initialize a bucket, try the previous one */ |
480 | for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket)) |
481 | { |
482 | el= lf_dynarray_lvalue(&hash->array, bucket); |
483 | if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0)) |
484 | break; |
485 | if (unlikely(bucket == 0)) |
486 | return 0; /* if there's no bucket==0, the hash is empty */ |
487 | } |
488 | found= l_search(el, hash->charset, my_reverse_bits(hashnr) | 1, |
489 | (uchar *)key, keylen, pins); |
490 | return found ? found+1 : 0; |
491 | } |
492 | |
493 | |
494 | /** |
495 | Iterate over all elements in hash and call function with the element |
496 | |
497 | @note |
498 | If one of 'action' invocations returns 1 the iteration aborts. |
499 | 'action' might see some elements twice! |
500 | |
501 | @retval 0 ok |
502 | @retval 1 error (action returned 1) |
503 | */ |
504 | int lf_hash_iterate(LF_HASH *hash, LF_PINS *pins, |
505 | my_hash_walk_action action, void *argument) |
506 | { |
507 | CURSOR cursor; |
508 | uint bucket= 0; |
509 | int res; |
510 | LF_SLIST * volatile *el; |
511 | |
512 | el= lf_dynarray_lvalue(&hash->array, bucket); |
513 | if (unlikely(!el)) |
514 | return 0; /* if there's no bucket==0, the hash is empty */ |
515 | if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) |
516 | return 0; /* if there's no bucket==0, the hash is empty */ |
517 | |
518 | res= l_find(el, 0, 0, (uchar*)argument, 0, &cursor, pins, action); |
519 | |
520 | lf_unpin(pins, 2); |
521 | lf_unpin(pins, 1); |
522 | lf_unpin(pins, 0); |
523 | return res; |
524 | } |
525 | |
526 | void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) |
527 | { |
528 | return lf_hash_search_using_hash_value(hash, pins, |
529 | hash->hash_function(hash->charset, |
530 | (uchar*) key, |
531 | keylen) & INT_MAX32, |
532 | key, keylen); |
533 | } |
534 | |
535 | static const uchar *dummy_key= (uchar*)"" ; |
536 | |
537 | /* |
538 | RETURN |
539 | 0 - ok |
540 | -1 - out of memory |
541 | */ |
542 | static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, |
543 | uint bucket, LF_PINS *pins) |
544 | { |
545 | uint parent= my_clear_highest_bit(bucket); |
546 | LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); |
547 | LF_SLIST **tmp= 0, *cur; |
548 | LF_SLIST * volatile *el= lf_dynarray_lvalue(&hash->array, parent); |
549 | if (unlikely(!el || !dummy)) |
550 | return -1; |
551 | if (*el == NULL && bucket && |
552 | unlikely(initialize_bucket(hash, el, parent, pins))) |
553 | { |
554 | my_free(dummy); |
555 | return -1; |
556 | } |
557 | dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */ |
558 | dummy->key= dummy_key; |
559 | dummy->keylen= 0; |
560 | if ((cur= l_insert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE))) |
561 | { |
562 | my_free(dummy); |
563 | dummy= cur; |
564 | } |
565 | my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy); |
566 | /* |
567 | note that if the CAS above failed (after l_insert() succeeded), |
568 | it would mean that some other thread has executed l_insert() for |
569 | the same dummy node, its l_insert() failed, it picked up our |
570 | dummy node (in "dummy= cur") and executed the same CAS as above. |
571 | Which means that even if CAS above failed we don't need to retry, |
572 | and we should not free(dummy) - there's no memory leak here |
573 | */ |
574 | return 0; |
575 | } |
576 | |