| 1 | /* QQ: TODO multi-pinbox */ |
| 2 | /* Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
| 16 | |
| 17 | /* |
| 18 | wait-free concurrent allocator based on pinning addresses |
| 19 | |
| 20 | It works as follows: every thread (strictly speaking - every CPU, but |
| 21 | it's too difficult to do) has a small array of pointers. They're called |
| 22 | "pins". Before using an object its address must be stored in this array |
| 23 | (pinned). When an object is no longer necessary its address must be |
| 24 | removed from this array (unpinned). When a thread wants to free() an |
| 25 | object it scans all pins of all threads to see if somebody has this |
| 26 | object pinned. If yes - the object is not freed (but stored in a |
| 27 | "purgatory"). To reduce the cost of a single free() pins are not scanned |
| 28 | on every free() but only added to (thread-local) purgatory. On every |
| 29 | LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects |
| 30 | are freed. |
| 31 | |
| 32 | Pins are used to solve ABA problem. To use pins one must obey |
| 33 | a pinning protocol: |
| 34 | |
| 35 | 1. Let's assume that PTR is a shared pointer to an object. Shared means |
| 36 | that any thread may modify it anytime to point to a different object |
| 37 | and free the old object. Later the freed object may be potentially |
| 38 | allocated by another thread. If we're unlucky that other thread may |
| 39 | set PTR to point to this object again. This is ABA problem. |
| 40 | 2. Create a local pointer LOCAL_PTR. |
| 41 | 3. Pin the PTR in a loop: |
| 42 | do |
| 43 | { |
| 44 | LOCAL_PTR= PTR; |
| 45 | pin(PTR, PIN_NUMBER); |
| 46 | } while (LOCAL_PTR != PTR) |
| 47 | 4. It is guaranteed that after the loop has ended, LOCAL_PTR |
| 48 | points to an object (or NULL, if PTR may be NULL), that |
| 49 | will never be freed. It is not guaranteed though |
| 50 | that LOCAL_PTR == PTR (as PTR can change any time) |
| 51 | 5. When done working with the object, remove the pin: |
| 52 | unpin(PIN_NUMBER) |
| 53 | 6. When copying pins (as in the list traversing loop: |
| 54 | pin(CUR, 1); |
| 55 | while () |
| 56 | { |
| 57 | do // standard |
| 58 | { // pinning |
| 59 | NEXT=CUR->next; // loop |
| 60 | pin(NEXT, 0); // see #3 |
| 61 | } while (NEXT != CUR->next); // above |
| 62 | ... |
| 63 | ... |
| 64 | CUR=NEXT; |
| 65 | pin(CUR, 1); // copy pin[0] to pin[1] |
| 66 | } |
| 67 | which keeps CUR address constantly pinned), note than pins may be |
| 68 | copied only upwards (!!!), that is pin[N] to pin[M], M > N. |
| 69 | 7. Don't keep the object pinned longer than necessary - the number of |
| 70 | pins you have is limited (and small), keeping an object pinned |
| 71 | prevents its reuse and cause unnecessary mallocs. |
| 72 | |
| 73 | Explanations: |
| 74 | |
| 75 | 3. The loop is important. The following can occur: |
| 76 | thread1> LOCAL_PTR= PTR |
| 77 | thread2> free(PTR); PTR=0; |
| 78 | thread1> pin(PTR, PIN_NUMBER); |
| 79 | now thread1 cannot access LOCAL_PTR, even if it's pinned, |
| 80 | because it points to a freed memory. That is, it *must* |
| 81 | verify that it has indeed pinned PTR, the shared pointer. |
| 82 | |
| 83 | 6. When a thread wants to free some LOCAL_PTR, and it scans |
| 84 | all lists of pins to see whether it's pinned, it does it |
| 85 | upwards, from low pin numbers to high. Thus another thread |
| 86 | must copy an address from one pin to another in the same |
| 87 | direction - upwards, otherwise the scanning thread may |
| 88 | miss it. |
| 89 | |
| 90 | Implementation details: |
| 91 | |
| 92 | Pins are given away from a "pinbox". Pinbox is stack-based allocator. |
| 93 | It used dynarray for storing pins, new elements are allocated by dynarray |
| 94 | as necessary, old are pushed in the stack for reuse. ABA is solved by |
| 95 | versioning a pointer - because we use an array, a pointer to pins is 16 bit, |
| 96 | upper 16 bits are used for a version. |
| 97 | |
| 98 | It is assumed that pins belong to a THD and are not transferable |
| 99 | between THD's (LF_PINS::stack_ends_here being a primary reason |
| 100 | for this limitation). |
| 101 | */ |
| 102 | #include <my_global.h> |
| 103 | #include <my_sys.h> |
| 104 | #include <lf.h> |
| 105 | |
| 106 | /* |
| 107 | when using alloca() leave at least that many bytes of the stack - |
| 108 | for functions we might be calling from within this stack frame |
| 109 | */ |
| 110 | #define ALLOCA_SAFETY_MARGIN 8192 |
| 111 | |
| 112 | #define LF_PINBOX_MAX_PINS 65536 |
| 113 | |
| 114 | static void lf_pinbox_real_free(LF_PINS *pins); |
| 115 | |
| 116 | /* |
| 117 | Initialize a pinbox. Normally called from lf_alloc_init. |
| 118 | See the latter for details. |
| 119 | */ |
| 120 | void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, |
| 121 | lf_pinbox_free_func *free_func, void *free_func_arg) |
| 122 | { |
| 123 | DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); |
| 124 | lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS)); |
| 125 | pinbox->pinstack_top_ver= 0; |
| 126 | pinbox->pins_in_array= 0; |
| 127 | pinbox->free_ptr_offset= free_ptr_offset; |
| 128 | pinbox->free_func= free_func; |
| 129 | pinbox->free_func_arg= free_func_arg; |
| 130 | } |
| 131 | |
| 132 | void lf_pinbox_destroy(LF_PINBOX *pinbox) |
| 133 | { |
| 134 | lf_dynarray_destroy(&pinbox->pinarray); |
| 135 | } |
| 136 | |
| 137 | /* |
| 138 | Get pins from a pinbox. Usually called via lf_alloc_get_pins() or |
| 139 | lf_hash_get_pins(). |
| 140 | |
| 141 | SYNOPSIS |
| 142 | pinbox - |
| 143 | |
| 144 | DESCRIPTION |
| 145 | get a new LF_PINS structure from a stack of unused pins, |
| 146 | or allocate a new one out of dynarray. |
| 147 | |
| 148 | NOTE |
| 149 | It is assumed that pins belong to a thread and are not transferable |
| 150 | between threads. |
| 151 | */ |
| 152 | LF_PINS *lf_pinbox_get_pins(LF_PINBOX *pinbox) |
| 153 | { |
| 154 | struct st_my_thread_var *var; |
| 155 | uint32 pins, next, top_ver; |
| 156 | LF_PINS *el; |
| 157 | /* |
| 158 | We have an array of max. 64k elements. |
| 159 | The highest index currently allocated is pinbox->pins_in_array. |
| 160 | Freed elements are in a lifo stack, pinstack_top_ver. |
| 161 | pinstack_top_ver is 32 bits; 16 low bits are the index in the |
| 162 | array, to the first element of the list. 16 high bits are a version |
| 163 | (every time the 16 low bits are updated, the 16 high bits are |
| 164 | incremented). Versioning prevents the ABA problem. |
| 165 | */ |
| 166 | top_ver= pinbox->pinstack_top_ver; |
| 167 | do |
| 168 | { |
| 169 | if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) |
| 170 | { |
| 171 | /* the stack of free elements is empty */ |
| 172 | pins= my_atomic_add32((int32 volatile*) &pinbox->pins_in_array, 1)+1; |
| 173 | if (unlikely(pins >= LF_PINBOX_MAX_PINS)) |
| 174 | return 0; |
| 175 | /* |
| 176 | note that the first allocated element has index 1 (pins==1). |
| 177 | index 0 is reserved to mean "NULL pointer" |
| 178 | */ |
| 179 | el= (LF_PINS *)lf_dynarray_lvalue(&pinbox->pinarray, pins); |
| 180 | if (unlikely(!el)) |
| 181 | return 0; |
| 182 | break; |
| 183 | } |
| 184 | el= (LF_PINS *)lf_dynarray_value(&pinbox->pinarray, pins); |
| 185 | next= el->link; |
| 186 | } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, |
| 187 | (int32*) &top_ver, |
| 188 | top_ver-pins+next+LF_PINBOX_MAX_PINS)); |
| 189 | /* |
| 190 | set el->link to the index of el in the dynarray (el->link has two usages: |
| 191 | - if element is allocated, it's its own index |
| 192 | - if element is free, it's its next element in the free stack |
| 193 | */ |
| 194 | el->link= pins; |
| 195 | el->purgatory_count= 0; |
| 196 | el->pinbox= pinbox; |
| 197 | var= my_thread_var; |
| 198 | /* |
| 199 | Threads that do not call my_thread_init() should still be |
| 200 | able to use the LF_HASH. |
| 201 | */ |
| 202 | el->stack_ends_here= (var ? & var->stack_ends_here : NULL); |
| 203 | return el; |
| 204 | } |
| 205 | |
| 206 | /* |
| 207 | Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or |
| 208 | lf_hash_put_pins(). |
| 209 | |
| 210 | DESCRIPTION |
| 211 | empty the purgatory (XXX deadlock warning below!), |
| 212 | push LF_PINS structure to a stack |
| 213 | */ |
| 214 | void lf_pinbox_put_pins(LF_PINS *pins) |
| 215 | { |
| 216 | LF_PINBOX *pinbox= pins->pinbox; |
| 217 | uint32 top_ver, nr; |
| 218 | nr= pins->link; |
| 219 | |
| 220 | #ifndef DBUG_OFF |
| 221 | { |
| 222 | /* This thread should not hold any pin. */ |
| 223 | int i; |
| 224 | for (i= 0; i < LF_PINBOX_PINS; i++) |
| 225 | DBUG_ASSERT(pins->pin[i] == 0); |
| 226 | } |
| 227 | #endif /* DBUG_OFF */ |
| 228 | |
| 229 | /* |
| 230 | XXX this will deadlock if other threads will wait for |
| 231 | the caller to do something after lf_pinbox_put_pins(), |
| 232 | and they would have pinned addresses that the caller wants to free. |
| 233 | Thus: only free pins when all work is done and nobody can wait for you!!! |
| 234 | */ |
| 235 | while (pins->purgatory_count) |
| 236 | { |
| 237 | lf_pinbox_real_free(pins); |
| 238 | if (pins->purgatory_count) |
| 239 | pthread_yield(); |
| 240 | } |
| 241 | top_ver= pinbox->pinstack_top_ver; |
| 242 | do |
| 243 | { |
| 244 | pins->link= top_ver % LF_PINBOX_MAX_PINS; |
| 245 | } while (!my_atomic_cas32((int32 volatile*) &pinbox->pinstack_top_ver, |
| 246 | (int32*) &top_ver, |
| 247 | top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); |
| 248 | return; |
| 249 | } |
| 250 | |
| 251 | static int ptr_cmp(void **a, void **b) |
| 252 | { |
| 253 | return *a < *b ? -1 : *a == *b ? 0 : 1; |
| 254 | } |
| 255 | |
| 256 | #define add_to_purgatory(PINS, ADDR) \ |
| 257 | do \ |
| 258 | { \ |
| 259 | *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \ |
| 260 | (PINS)->purgatory; \ |
| 261 | (PINS)->purgatory= (ADDR); \ |
| 262 | (PINS)->purgatory_count++; \ |
| 263 | } while (0) |
| 264 | |
| 265 | /* |
| 266 | Free an object allocated via pinbox allocator |
| 267 | |
| 268 | DESCRIPTION |
| 269 | add an object to purgatory. if necessary, calllf_pinbox_real_free() |
| 270 | to actually free something. |
| 271 | */ |
| 272 | void lf_pinbox_free(LF_PINS *pins, void *addr) |
| 273 | { |
| 274 | add_to_purgatory(pins, addr); |
| 275 | if (pins->purgatory_count % LF_PURGATORY_SIZE == 0) |
| 276 | lf_pinbox_real_free(pins); |
| 277 | } |
| 278 | |
| 279 | struct st_harvester { |
| 280 | void **granary; |
| 281 | int npins; |
| 282 | }; |
| 283 | |
| 284 | /* |
| 285 | callback forlf_dynarray_iterate: |
| 286 | scan all pins of all threads and accumulate all pins |
| 287 | */ |
| 288 | static int harvest_pins(LF_PINS *el, struct st_harvester *hv) |
| 289 | { |
| 290 | int i; |
| 291 | LF_PINS *el_end= el+MY_MIN(hv->npins, LF_DYNARRAY_LEVEL_LENGTH); |
| 292 | for (; el < el_end; el++) |
| 293 | { |
| 294 | for (i= 0; i < LF_PINBOX_PINS; i++) |
| 295 | { |
| 296 | void *p= el->pin[i]; |
| 297 | if (p) |
| 298 | *hv->granary++= p; |
| 299 | } |
| 300 | } |
| 301 | /* |
| 302 | hv->npins may become negative below, but it means that |
| 303 | we're on the last dynarray page and harvest_pins() won't be |
| 304 | called again. We don't bother to make hv->npins() correct |
| 305 | (that is 0) in this case. |
| 306 | */ |
| 307 | hv->npins-= LF_DYNARRAY_LEVEL_LENGTH; |
| 308 | return 0; |
| 309 | } |
| 310 | |
| 311 | /* |
| 312 | callback forlf_dynarray_iterate: |
| 313 | scan all pins of all threads and see if addr is present there |
| 314 | */ |
| 315 | static int match_pins(LF_PINS *el, void *addr) |
| 316 | { |
| 317 | int i; |
| 318 | LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH; |
| 319 | for (; el < el_end; el++) |
| 320 | for (i= 0; i < LF_PINBOX_PINS; i++) |
| 321 | if (el->pin[i] == addr) |
| 322 | return 1; |
| 323 | return 0; |
| 324 | } |
| 325 | |
| 326 | #define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) + (P)->free_ptr_offset))) |
| 327 | #define anext_node(X) next_node(&allocator->pinbox, (X)) |
| 328 | |
| 329 | /* |
| 330 | Scan the purgatory and free everything that can be freed |
| 331 | */ |
| 332 | static void lf_pinbox_real_free(LF_PINS *pins) |
| 333 | { |
| 334 | int npins; |
| 335 | void *list; |
| 336 | void **addr= NULL; |
| 337 | void *first= NULL, *last= NULL; |
| 338 | LF_PINBOX *pinbox= pins->pinbox; |
| 339 | |
| 340 | npins= pinbox->pins_in_array+1; |
| 341 | |
| 342 | #ifdef HAVE_ALLOCA |
| 343 | if (pins->stack_ends_here != NULL) |
| 344 | { |
| 345 | int alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins; |
| 346 | /* create a sorted list of pinned addresses, to speed up searches */ |
| 347 | if (available_stack_size(&pinbox, *pins->stack_ends_here) > |
| 348 | alloca_size + ALLOCA_SAFETY_MARGIN) |
| 349 | { |
| 350 | struct st_harvester hv; |
| 351 | addr= (void **) alloca(alloca_size); |
| 352 | hv.granary= addr; |
| 353 | hv.npins= npins; |
| 354 | /* scan the dynarray and accumulate all pinned addresses */ |
| 355 | lf_dynarray_iterate(&pinbox->pinarray, |
| 356 | (lf_dynarray_func)harvest_pins, &hv); |
| 357 | |
| 358 | npins= (int)(hv.granary-addr); |
| 359 | /* and sort them */ |
| 360 | if (npins) |
| 361 | qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); |
| 362 | } |
| 363 | } |
| 364 | #endif |
| 365 | |
| 366 | list= pins->purgatory; |
| 367 | pins->purgatory= 0; |
| 368 | pins->purgatory_count= 0; |
| 369 | while (list) |
| 370 | { |
| 371 | void *cur= list; |
| 372 | list= *(void **)((char *)cur+pinbox->free_ptr_offset); |
| 373 | if (npins) |
| 374 | { |
| 375 | if (addr) /* use binary search */ |
| 376 | { |
| 377 | void **a, **b, **c; |
| 378 | for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2) |
| 379 | if (cur == *c) |
| 380 | a= b= c; |
| 381 | else if (cur > *c) |
| 382 | a= c; |
| 383 | else |
| 384 | b= c; |
| 385 | if (cur == *a || cur == *b) |
| 386 | goto found; |
| 387 | } |
| 388 | else /* no alloca - no cookie. linear search here */ |
| 389 | { |
| 390 | if (lf_dynarray_iterate(&pinbox->pinarray, |
| 391 | (lf_dynarray_func)match_pins, cur)) |
| 392 | goto found; |
| 393 | } |
| 394 | } |
| 395 | /* not pinned - freeing */ |
| 396 | if (last) |
| 397 | last= next_node(pinbox, last)= (uchar *)cur; |
| 398 | else |
| 399 | first= last= (uchar *)cur; |
| 400 | continue; |
| 401 | found: |
| 402 | /* pinned - keeping */ |
| 403 | add_to_purgatory(pins, cur); |
| 404 | } |
| 405 | if (last) |
| 406 | pinbox->free_func(first, last, pinbox->free_func_arg); |
| 407 | } |
| 408 | |
| 409 | /* lock-free memory allocator for fixed-size objects */ |
| 410 | |
| 411 | /* |
| 412 | callback forlf_pinbox_real_free to free a list of unpinned objects - |
| 413 | add it back to the allocator stack |
| 414 | |
| 415 | DESCRIPTION |
| 416 | 'first' and 'last' are the ends of the linked list of nodes: |
| 417 | first->el->el->....->el->last. Use first==last to free only one element. |
| 418 | */ |
| 419 | static void alloc_free(uchar *first, |
| 420 | uchar volatile *last, |
| 421 | LF_ALLOCATOR *allocator) |
| 422 | { |
| 423 | /* |
| 424 | we need a union here to access type-punned pointer reliably. |
| 425 | otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop |
| 426 | */ |
| 427 | union { uchar * node; void *ptr; } tmp; |
| 428 | tmp.node= allocator->top; |
| 429 | do |
| 430 | { |
| 431 | anext_node(last)= tmp.node; |
| 432 | } while (!my_atomic_casptr((void **)(char *)&allocator->top, |
| 433 | (void **)&tmp.ptr, first) && LF_BACKOFF()); |
| 434 | } |
| 435 | |
| 436 | /* |
| 437 | initialize lock-free allocator |
| 438 | |
| 439 | SYNOPSIS |
| 440 | allocator - |
| 441 | size a size of an object to allocate |
| 442 | free_ptr_offset an offset inside the object to a sizeof(void *) |
| 443 | memory that is guaranteed to be unused after |
| 444 | the object is put in the purgatory. Unused by ANY |
| 445 | thread, not only the purgatory owner. |
| 446 | This memory will be used to link waiting-to-be-freed |
| 447 | objects in a purgatory list. |
| 448 | */ |
| 449 | void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) |
| 450 | { |
| 451 | lf_pinbox_init(&allocator->pinbox, free_ptr_offset, |
| 452 | (lf_pinbox_free_func *)alloc_free, allocator); |
| 453 | allocator->top= 0; |
| 454 | allocator->mallocs= 0; |
| 455 | allocator->element_size= size; |
| 456 | allocator->constructor= 0; |
| 457 | allocator->destructor= 0; |
| 458 | DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset); |
| 459 | } |
| 460 | |
| 461 | /* |
| 462 | destroy the allocator, free everything that's in it |
| 463 | |
| 464 | NOTE |
| 465 | As every other init/destroy function here and elsewhere it |
| 466 | is not thread safe. No, this function is no different, ensure |
| 467 | that no thread needs the allocator before destroying it. |
| 468 | We are not responsible for any damage that may be caused by |
| 469 | accessing the allocator when it is being or has been destroyed. |
| 470 | Oh yes, and don't put your cat in a microwave. |
| 471 | */ |
| 472 | void lf_alloc_destroy(LF_ALLOCATOR *allocator) |
| 473 | { |
| 474 | uchar *node= allocator->top; |
| 475 | while (node) |
| 476 | { |
| 477 | uchar *tmp= anext_node(node); |
| 478 | if (allocator->destructor) |
| 479 | allocator->destructor(node); |
| 480 | my_free(node); |
| 481 | node= tmp; |
| 482 | } |
| 483 | lf_pinbox_destroy(&allocator->pinbox); |
| 484 | allocator->top= 0; |
| 485 | } |
| 486 | |
| 487 | /* |
| 488 | Allocate and return an new object. |
| 489 | |
| 490 | DESCRIPTION |
| 491 | Pop an unused object from the stack or malloc it is the stack is empty. |
| 492 | pin[0] is used, it's removed on return. |
| 493 | */ |
| 494 | void *lf_alloc_new(LF_PINS *pins) |
| 495 | { |
| 496 | LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); |
| 497 | uchar *node; |
| 498 | for (;;) |
| 499 | { |
| 500 | do |
| 501 | { |
| 502 | node= allocator->top; |
| 503 | lf_pin(pins, 0, node); |
| 504 | } while (node != allocator->top && LF_BACKOFF()); |
| 505 | if (!node) |
| 506 | { |
| 507 | node= (void *)my_malloc(allocator->element_size, MYF(MY_WME)); |
| 508 | if (allocator->constructor) |
| 509 | allocator->constructor(node); |
| 510 | #ifdef MY_LF_EXTRA_DEBUG |
| 511 | if (likely(node != 0)) |
| 512 | my_atomic_add32(&allocator->mallocs, 1); |
| 513 | #endif |
| 514 | break; |
| 515 | } |
| 516 | if (my_atomic_casptr((void **)(char *)&allocator->top, |
| 517 | (void *)&node, anext_node(node))) |
| 518 | break; |
| 519 | } |
| 520 | lf_unpin(pins, 0); |
| 521 | return node; |
| 522 | } |
| 523 | |
| 524 | /* |
| 525 | count the number of objects in a pool. |
| 526 | |
| 527 | NOTE |
| 528 | This is NOT thread-safe !!! |
| 529 | */ |
| 530 | uint lf_alloc_pool_count(LF_ALLOCATOR *allocator) |
| 531 | { |
| 532 | uint i; |
| 533 | uchar *node; |
| 534 | for (node= allocator->top, i= 0; node; node= anext_node(node), i++) |
| 535 | /* no op */; |
| 536 | return i; |
| 537 | } |
| 538 | |
| 539 | |