1 | /************************************************************************************ |
2 | Copyright (C) 2000, 2012 MySQL AB & MySQL Finland AB & TCX DataKonsult AB, |
3 | Monty Program AB, 2016 MariaDB Corporation AB |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Library General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2 of the License, or (at your option) any later version. |
9 | |
10 | This library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Library General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Library General Public |
16 | License along with this library; if not see <http://www.gnu.org/licenses> |
17 | or write to the Free Software Foundation, Inc., |
18 | 51 Franklin St., Fifth Floor, Boston, MA 02110, USA |
19 | |
20 | Part of this code includes code from the PHP project which |
21 | is freely available from http://www.php.net |
22 | *************************************************************************************/ |
23 | |
24 | /* The hash functions used for saving keys */ |
25 | /* One of key_length or key_length_offset must be given */ |
26 | /* Key length of 0 isn't allowed */ |
27 | |
28 | #include <ma_global.h> |
29 | #include <ma_sys.h> |
30 | #include <ma_string.h> |
31 | #include <mariadb_ctype.h> |
32 | #include "ma_hash.h" |
33 | |
34 | #define NO_RECORD ((uint) -1) |
35 | #define LOWFIND 1 |
36 | #define LOWUSED 2 |
37 | #define HIGHFIND 4 |
38 | #define HIGHUSED 8 |
39 | |
40 | static uint hash_mask(uint hashnr,uint buffmax,uint maxlength); |
41 | static void movelink(HASH_LINK *array,uint pos,uint next_link,uint newlink); |
42 | static uint calc_hashnr(const uchar *key,uint length); |
43 | static uint calc_hashnr_caseup(const uchar *key,uint length); |
44 | static int hashcmp(HASH *hash,HASH_LINK *pos,const uchar *key,uint length); |
45 | |
46 | |
47 | my_bool _hash_init(HASH *hash,uint size,uint key_offset,uint key_length, |
48 | hash_get_key get_key, |
49 | void (*free_element)(void*),uint flags CALLER_INFO_PROTO) |
50 | { |
51 | hash->records=0; |
52 | if (ma_init_dynamic_array_ci(&hash->array,sizeof(HASH_LINK),size,0)) |
53 | { |
54 | hash->free=0; /* Allow call to hash_free */ |
55 | return(TRUE); |
56 | } |
57 | hash->key_offset=key_offset; |
58 | hash->key_length=key_length; |
59 | hash->blength=1; |
60 | hash->current_record= NO_RECORD; /* For the future */ |
61 | hash->get_key=get_key; |
62 | hash->free=free_element; |
63 | hash->flags=flags; |
64 | if (flags & HASH_CASE_INSENSITIVE) |
65 | hash->calc_hashnr=calc_hashnr_caseup; |
66 | else |
67 | hash->calc_hashnr=calc_hashnr; |
68 | return(0); |
69 | } |
70 | |
71 | |
72 | void hash_free(HASH *hash) |
73 | { |
74 | if (hash->free) |
75 | { |
76 | uint i,records; |
77 | HASH_LINK *data=dynamic_element(&hash->array,0,HASH_LINK*); |
78 | for (i=0,records=hash->records ; i < records ; i++) |
79 | (*hash->free)(data[i].data); |
80 | hash->free=0; |
81 | } |
82 | ma_delete_dynamic(&hash->array); |
83 | hash->records=0; |
84 | return; |
85 | } |
86 | |
87 | /* some helper functions */ |
88 | |
89 | /* |
90 | This function is char* instead of uchar* as HPUX11 compiler can't |
91 | handle inline functions that are not defined as native types |
92 | */ |
93 | |
94 | static inline char* |
95 | hash_key(HASH *hash,const uchar *record,uint *length,my_bool first) |
96 | { |
97 | if (hash->get_key) |
98 | return (char *)(*hash->get_key)(record,(uint *)length,first); |
99 | *length=hash->key_length; |
100 | return (char*) record+hash->key_offset; |
101 | } |
102 | |
103 | /* Calculate pos according to keys */ |
104 | |
105 | static uint hash_mask(uint hashnr,uint buffmax,uint maxlength) |
106 | { |
107 | if ((hashnr & (buffmax-1)) < maxlength) return (hashnr & (buffmax-1)); |
108 | return (hashnr & ((buffmax >> 1) -1)); |
109 | } |
110 | |
111 | static uint hash_rec_mask(HASH *hash,HASH_LINK *pos,uint buffmax, |
112 | uint maxlength) |
113 | { |
114 | uint length; |
115 | uchar *key= (uchar*) hash_key(hash,pos->data,&length,0); |
116 | return hash_mask((*hash->calc_hashnr)(key,length),buffmax,maxlength); |
117 | } |
118 | |
119 | #ifndef NEW_HASH_FUNCTION |
120 | |
121 | /* Calc hashvalue for a key */ |
122 | |
123 | static uint calc_hashnr(const uchar *key,uint length) |
124 | { |
125 | register uint nr=1, nr2=4; |
126 | while (length--) |
127 | { |
128 | nr^= (((nr & 63)+nr2)*((uint) (uchar) *key++))+ (nr << 8); |
129 | nr2+=3; |
130 | } |
131 | return((uint) nr); |
132 | } |
133 | |
134 | /* Calc hashvalue for a key, case independently */ |
135 | |
136 | static uint calc_hashnr_caseup(const uchar *key,uint length) |
137 | { |
138 | register uint nr=1, nr2=4; |
139 | while (length--) |
140 | { |
141 | nr^= (((nr & 63)+nr2)*((uint) (uchar) toupper(*key++)))+ (nr << 8); |
142 | nr2+=3; |
143 | } |
144 | return((uint) nr); |
145 | } |
146 | |
147 | #else |
148 | |
149 | /* |
150 | * Fowler/Noll/Vo hash |
151 | * |
152 | * The basis of the hash algorithm was taken from an idea sent by email to the |
153 | * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and |
154 | * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com) |
155 | * later improved on their algorithm. |
156 | * |
157 | * The magic is in the interesting relationship between the special prime |
158 | * 16777619 (2^24 + 403) and 2^32 and 2^8. |
159 | * |
160 | * This hash produces the fewest collisions of any function that we've seen so |
161 | * far, and works well on both numbers and strings. |
162 | */ |
163 | |
164 | uint calc_hashnr(const uchar *key, uint len) |
165 | { |
166 | const uchar *end=key+len; |
167 | uint hash; |
168 | for (hash = 0; key < end; key++) |
169 | { |
170 | hash *= 16777619; |
171 | hash ^= (uint) *(uchar*) key; |
172 | } |
173 | return (hash); |
174 | } |
175 | |
176 | uint calc_hashnr_caseup(const uchar *key, uint len) |
177 | { |
178 | const uchar *end=key+len; |
179 | uint hash; |
180 | for (hash = 0; key < end; key++) |
181 | { |
182 | hash *= 16777619; |
183 | hash ^= (uint) (uchar) toupper(*key); |
184 | } |
185 | return (hash); |
186 | } |
187 | |
188 | #endif |
189 | |
190 | |
191 | #ifndef __SUNPRO_C /* SUNPRO can't handle this */ |
192 | static inline |
193 | #endif |
194 | unsigned int rec_hashnr(HASH *hash,const uchar *record) |
195 | { |
196 | uint length; |
197 | uchar *key= (uchar*) hash_key(hash,record,&length,0); |
198 | return (*hash->calc_hashnr)(key,length); |
199 | } |
200 | |
201 | |
202 | /* Search after a record based on a key */ |
203 | /* Sets info->current_ptr to found record */ |
204 | |
205 | void* hash_search(HASH *hash,const uchar *key,uint length) |
206 | { |
207 | HASH_LINK *pos; |
208 | uint flag,idx; |
209 | |
210 | flag=1; |
211 | if (hash->records) |
212 | { |
213 | idx=hash_mask((*hash->calc_hashnr)(key,length ? length : |
214 | hash->key_length), |
215 | hash->blength,hash->records); |
216 | do |
217 | { |
218 | pos= dynamic_element(&hash->array,idx,HASH_LINK*); |
219 | if (!hashcmp(hash,pos,key,length)) |
220 | { |
221 | hash->current_record= idx; |
222 | return (pos->data); |
223 | } |
224 | if (flag) |
225 | { |
226 | flag=0; /* Reset flag */ |
227 | if (hash_rec_mask(hash,pos,hash->blength,hash->records) != idx) |
228 | break; /* Wrong link */ |
229 | } |
230 | } |
231 | while ((idx=pos->next) != NO_RECORD); |
232 | } |
233 | hash->current_record= NO_RECORD; |
234 | return(0); |
235 | } |
236 | |
237 | /* Get next record with identical key */ |
238 | /* Can only be called if previous calls was hash_search */ |
239 | |
240 | void *hash_next(HASH *hash,const uchar *key,uint length) |
241 | { |
242 | HASH_LINK *pos; |
243 | uint idx; |
244 | |
245 | if (hash->current_record != NO_RECORD) |
246 | { |
247 | HASH_LINK *data=dynamic_element(&hash->array,0,HASH_LINK*); |
248 | for (idx=data[hash->current_record].next; idx != NO_RECORD ; idx=pos->next) |
249 | { |
250 | pos=data+idx; |
251 | if (!hashcmp(hash,pos,key,length)) |
252 | { |
253 | hash->current_record= idx; |
254 | return pos->data; |
255 | } |
256 | } |
257 | hash->current_record=NO_RECORD; |
258 | } |
259 | return 0; |
260 | } |
261 | |
262 | |
263 | /* Change link from pos to new_link */ |
264 | |
265 | static void movelink(HASH_LINK *array,uint find,uint next_link,uint newlink) |
266 | { |
267 | HASH_LINK *old_link; |
268 | do |
269 | { |
270 | old_link=array+next_link; |
271 | } |
272 | while ((next_link=old_link->next) != find); |
273 | old_link->next= newlink; |
274 | return; |
275 | } |
276 | |
277 | /* Compare a key in a record to a whole key. Return 0 if identical */ |
278 | |
279 | static int hashcmp(HASH *hash,HASH_LINK *pos,const uchar *key,uint length) |
280 | { |
281 | uint rec_keylength; |
282 | uchar *rec_key= (uchar*) hash_key(hash,pos->data,&rec_keylength,1); |
283 | return (length && length != rec_keylength) || |
284 | memcmp(rec_key,key,rec_keylength); |
285 | } |
286 | |
287 | |
288 | /* Write a hash-key to the hash-index */ |
289 | |
290 | my_bool hash_insert(HASH *info,const uchar *record) |
291 | { |
292 | int flag; |
293 | uint halfbuff,hash_nr,first_index,idx; |
294 | uchar *ptr_to_rec= NULL,*ptr_to_rec2= NULL; |
295 | HASH_LINK *data,*empty,*gpos= NULL,*gpos2 = NULL,*pos; |
296 | |
297 | LINT_INIT(gpos); LINT_INIT(gpos2); |
298 | LINT_INIT(ptr_to_rec); LINT_INIT(ptr_to_rec2); |
299 | |
300 | flag=0; |
301 | if (!(empty=(HASH_LINK*) ma_alloc_dynamic(&info->array))) |
302 | return(TRUE); /* No more memory */ |
303 | |
304 | info->current_record= NO_RECORD; |
305 | data=dynamic_element(&info->array,0,HASH_LINK*); |
306 | halfbuff= info->blength >> 1; |
307 | |
308 | idx=first_index=info->records-halfbuff; |
309 | if (idx != info->records) /* If some records */ |
310 | { |
311 | do |
312 | { |
313 | pos=data+idx; |
314 | hash_nr=rec_hashnr(info,pos->data); |
315 | if (flag == 0) /* First loop; Check if ok */ |
316 | if (hash_mask(hash_nr,info->blength,info->records) != first_index) |
317 | break; |
318 | if (!(hash_nr & halfbuff)) |
319 | { /* Key will not move */ |
320 | if (!(flag & LOWFIND)) |
321 | { |
322 | if (flag & HIGHFIND) |
323 | { |
324 | flag=LOWFIND | HIGHFIND; |
325 | /* key shall be moved to the current empty position */ |
326 | gpos=empty; |
327 | ptr_to_rec=pos->data; |
328 | empty=pos; /* This place is now free */ |
329 | } |
330 | else |
331 | { |
332 | flag=LOWFIND | LOWUSED; /* key isn't changed */ |
333 | gpos=pos; |
334 | ptr_to_rec=pos->data; |
335 | } |
336 | } |
337 | else |
338 | { |
339 | if (!(flag & LOWUSED)) |
340 | { |
341 | /* Change link of previous LOW-key */ |
342 | gpos->data=ptr_to_rec; |
343 | gpos->next=(uint) (pos-data); |
344 | flag= (flag & HIGHFIND) | (LOWFIND | LOWUSED); |
345 | } |
346 | gpos=pos; |
347 | ptr_to_rec=pos->data; |
348 | } |
349 | } |
350 | else |
351 | { /* key will be moved */ |
352 | if (!(flag & HIGHFIND)) |
353 | { |
354 | flag= (flag & LOWFIND) | HIGHFIND; |
355 | /* key shall be moved to the last (empty) position */ |
356 | gpos2 = empty; empty=pos; |
357 | ptr_to_rec2=pos->data; |
358 | } |
359 | else |
360 | { |
361 | if (!(flag & HIGHUSED)) |
362 | { |
363 | /* Change link of previous hash-key and save */ |
364 | gpos2->data=ptr_to_rec2; |
365 | gpos2->next=(uint) (pos-data); |
366 | flag= (flag & LOWFIND) | (HIGHFIND | HIGHUSED); |
367 | } |
368 | gpos2=pos; |
369 | ptr_to_rec2=pos->data; |
370 | } |
371 | } |
372 | } |
373 | while ((idx=pos->next) != NO_RECORD); |
374 | |
375 | if ((flag & (LOWFIND | LOWUSED)) == LOWFIND) |
376 | { |
377 | gpos->data=ptr_to_rec; |
378 | gpos->next=NO_RECORD; |
379 | } |
380 | if ((flag & (HIGHFIND | HIGHUSED)) == HIGHFIND) |
381 | { |
382 | gpos2->data=ptr_to_rec2; |
383 | gpos2->next=NO_RECORD; |
384 | } |
385 | } |
386 | /* Check if we are at the empty position */ |
387 | |
388 | idx=hash_mask(rec_hashnr(info,record),info->blength,info->records+1); |
389 | pos=data+idx; |
390 | if (pos == empty) |
391 | { |
392 | pos->data=(uchar*) record; |
393 | pos->next=NO_RECORD; |
394 | } |
395 | else |
396 | { |
397 | /* Check if more records in same hash-nr family */ |
398 | empty[0]=pos[0]; |
399 | gpos=data+hash_rec_mask(info,pos,info->blength,info->records+1); |
400 | if (pos == gpos) |
401 | { |
402 | pos->data=(uchar*) record; |
403 | pos->next=(uint) (empty - data); |
404 | } |
405 | else |
406 | { |
407 | pos->data=(uchar*) record; |
408 | pos->next=NO_RECORD; |
409 | movelink(data,(uint) (pos-data),(uint) (gpos-data),(uint) (empty-data)); |
410 | } |
411 | } |
412 | if (++info->records == info->blength) |
413 | info->blength+= info->blength; |
414 | return(0); |
415 | } |
416 | |
417 | |
418 | /****************************************************************************** |
419 | ** Remove one record from hash-table. The record with the same record |
420 | ** ptr is removed. |
421 | ** if there is a free-function it's called for record if found |
422 | ******************************************************************************/ |
423 | |
424 | my_bool hash_delete(HASH *hash,uchar *record) |
425 | { |
426 | uint blength,pos2,pos_hashnr,lastpos_hashnr,idx,empty_index; |
427 | HASH_LINK *data,*lastpos,*gpos,*pos,*pos3,*empty; |
428 | if (!hash->records) |
429 | return(1); |
430 | |
431 | blength=hash->blength; |
432 | data=dynamic_element(&hash->array,0,HASH_LINK*); |
433 | /* Search after record with key */ |
434 | pos=data+ hash_mask(rec_hashnr(hash,record),blength,hash->records); |
435 | gpos = 0; |
436 | |
437 | while (pos->data != record) |
438 | { |
439 | gpos=pos; |
440 | if (pos->next == NO_RECORD) |
441 | return(1); /* Key not found */ |
442 | pos=data+pos->next; |
443 | } |
444 | |
445 | if ( --(hash->records) < hash->blength >> 1) hash->blength>>=1; |
446 | hash->current_record= NO_RECORD; |
447 | lastpos=data+hash->records; |
448 | |
449 | /* Remove link to record */ |
450 | empty=pos; empty_index=(uint) (empty-data); |
451 | if (gpos) |
452 | gpos->next=pos->next; /* unlink current ptr */ |
453 | else if (pos->next != NO_RECORD) |
454 | { |
455 | empty=data+(empty_index=pos->next); |
456 | pos->data=empty->data; |
457 | pos->next=empty->next; |
458 | } |
459 | |
460 | if (empty == lastpos) /* last key at wrong pos or no next link */ |
461 | goto exit; |
462 | |
463 | /* Move the last key (lastpos) */ |
464 | lastpos_hashnr=rec_hashnr(hash,lastpos->data); |
465 | /* pos is where lastpos should be */ |
466 | pos=data+hash_mask(lastpos_hashnr,hash->blength,hash->records); |
467 | if (pos == empty) /* Move to empty position. */ |
468 | { |
469 | empty[0]=lastpos[0]; |
470 | goto exit; |
471 | } |
472 | pos_hashnr=rec_hashnr(hash,pos->data); |
473 | /* pos3 is where the pos should be */ |
474 | pos3= data+hash_mask(pos_hashnr,hash->blength,hash->records); |
475 | if (pos != pos3) |
476 | { /* pos is on wrong posit */ |
477 | empty[0]=pos[0]; /* Save it here */ |
478 | pos[0]=lastpos[0]; /* This should be here */ |
479 | movelink(data,(uint) (pos-data),(uint) (pos3-data),empty_index); |
480 | goto exit; |
481 | } |
482 | pos2= hash_mask(lastpos_hashnr,blength,hash->records+1); |
483 | if (pos2 == hash_mask(pos_hashnr,blength,hash->records+1)) |
484 | { /* Identical key-positions */ |
485 | if (pos2 != hash->records) |
486 | { |
487 | empty[0]=lastpos[0]; |
488 | movelink(data,(uint) (lastpos-data),(uint) (pos-data),empty_index); |
489 | goto exit; |
490 | } |
491 | idx= (uint) (pos-data); /* Link pos->next after lastpos */ |
492 | } |
493 | else idx= NO_RECORD; /* Different positions merge */ |
494 | |
495 | empty[0]=lastpos[0]; |
496 | movelink(data,idx,empty_index,pos->next); |
497 | pos->next=empty_index; |
498 | |
499 | exit: |
500 | ma_pop_dynamic(&hash->array); |
501 | if (hash->free) |
502 | (*hash->free)((uchar*) record); |
503 | return(0); |
504 | } |
505 | |
506 | /* |
507 | Update keys when record has changed. |
508 | This is much more efficient than using a delete & insert. |
509 | */ |
510 | |
511 | my_bool hash_update(HASH *hash,uchar *record,uchar *old_key,uint old_key_length) |
512 | { |
513 | uint idx,new_index,new_pos_index,blength,records,empty; |
514 | HASH_LINK org_link,*data,*previous,*pos; |
515 | |
516 | data=dynamic_element(&hash->array,0,HASH_LINK*); |
517 | blength=hash->blength; records=hash->records; |
518 | |
519 | /* Search after record with key */ |
520 | |
521 | idx=hash_mask((*hash->calc_hashnr)(old_key,(old_key_length ? |
522 | old_key_length : |
523 | hash->key_length)), |
524 | blength,records); |
525 | new_index=hash_mask(rec_hashnr(hash,record),blength,records); |
526 | if (idx == new_index) |
527 | return(0); /* Nothing to do (No record check) */ |
528 | previous=0; |
529 | for (;;) |
530 | { |
531 | |
532 | if ((pos= data+idx)->data == record) |
533 | break; |
534 | previous=pos; |
535 | if ((idx=pos->next) == NO_RECORD) |
536 | return(1); /* Not found in links */ |
537 | } |
538 | hash->current_record= NO_RECORD; |
539 | org_link= *pos; |
540 | empty=idx; |
541 | |
542 | /* Relink record from current chain */ |
543 | |
544 | if (!previous) |
545 | { |
546 | if (pos->next != NO_RECORD) |
547 | { |
548 | empty=pos->next; |
549 | *pos= data[pos->next]; |
550 | } |
551 | } |
552 | else |
553 | previous->next=pos->next; /* unlink pos */ |
554 | |
555 | /* Move data to correct position */ |
556 | pos=data+new_index; |
557 | new_pos_index=hash_rec_mask(hash,pos,blength,records); |
558 | if (new_index != new_pos_index) |
559 | { /* Other record in wrong position */ |
560 | data[empty] = *pos; |
561 | movelink(data,new_index,new_pos_index,empty); |
562 | org_link.next=NO_RECORD; |
563 | data[new_index]= org_link; |
564 | } |
565 | else |
566 | { /* Link in chain at right position */ |
567 | org_link.next=data[new_index].next; |
568 | data[empty]=org_link; |
569 | data[new_index].next=empty; |
570 | } |
571 | return(0); |
572 | } |
573 | |
574 | |
575 | uchar *hash_element(HASH *hash,uint idx) |
576 | { |
577 | if (idx < hash->records) |
578 | return dynamic_element(&hash->array,idx,HASH_LINK*)->data; |
579 | return 0; |
580 | } |
581 | |
582 | |
583 | |
584 | |