1 | /* Hash table for checking keyword links. Implemented using double hashing. |
2 | Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. |
3 | Written by Douglas C. Schmidt <schmidt@ics.uci.edu> |
4 | and Bruno Haible <bruno@clisp.org>. |
5 | |
6 | This file is part of GNU GPERF. |
7 | |
8 | This program is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation; either version 3 of the License, or |
11 | (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* Specification. */ |
22 | #include "hash-table.h" |
23 | |
24 | #include <stdio.h> |
25 | #include <string.h> /* declares memset(), strcmp() */ |
26 | #include <hash.h> |
27 | #include "options.h" |
28 | |
29 | /* We use a hash table with double hashing. This is the simplest kind of |
30 | hash table, given that we always only insert and never remove entries |
31 | from the hash table. */ |
32 | |
33 | /* To make double hashing efficient, there need to be enough spare entries. */ |
34 | static const int size_factor = 10; |
35 | |
36 | /* We make the size of the hash table a power of 2. This allows for two |
37 | optimizations: It eliminates the modulo instruction, and allows for an |
38 | easy secondary hashing function. */ |
39 | |
40 | /* Constructor. */ |
41 | Hash_Table::Hash_Table (unsigned int size, bool ignore_length) |
42 | : _ignore_length (ignore_length), |
43 | _collisions (0) |
44 | { |
45 | /* There need to be enough spare entries. */ |
46 | size = size * size_factor; |
47 | |
48 | /* Find smallest power of 2 that is >= size. */ |
49 | unsigned int shift = 0; |
50 | if ((size >> 16) > 0) |
51 | { |
52 | size = size >> 16; |
53 | shift += 16; |
54 | } |
55 | if ((size >> 8) > 0) |
56 | { |
57 | size = size >> 8; |
58 | shift += 8; |
59 | } |
60 | if ((size >> 4) > 0) |
61 | { |
62 | size = size >> 4; |
63 | shift += 4; |
64 | } |
65 | if ((size >> 2) > 0) |
66 | { |
67 | size = size >> 2; |
68 | shift += 2; |
69 | } |
70 | if ((size >> 1) > 0) |
71 | { |
72 | size = size >> 1; |
73 | shift += 1; |
74 | } |
75 | _log_size = shift; |
76 | _size = 1 << shift; |
77 | |
78 | /* Allocate table. */ |
79 | _table = new KeywordExt*[_size]; |
80 | memset (_table, 0, _size * sizeof (*_table)); |
81 | } |
82 | |
83 | /* Destructor. */ |
84 | Hash_Table::~Hash_Table () |
85 | { |
86 | delete[] _table; |
87 | } |
88 | |
89 | /* Print the table's contents. */ |
90 | void |
91 | Hash_Table::dump () const |
92 | { |
93 | int field_width; |
94 | |
95 | field_width = 0; |
96 | { |
97 | for (int i = _size - 1; i >= 0; i--) |
98 | if (_table[i]) |
99 | if (field_width < _table[i]->_selchars_length) |
100 | field_width = _table[i]->_selchars_length; |
101 | } |
102 | |
103 | fprintf (stderr, |
104 | "\ndumping the hash table\n" |
105 | "total available table slots = %d, total bytes = %d, total collisions = %d\n" |
106 | "location, %*s, keyword\n" , |
107 | _size, _size * static_cast<unsigned int>(sizeof (*_table)), |
108 | _collisions, field_width, "keysig" ); |
109 | |
110 | for (int i = _size - 1; i >= 0; i--) |
111 | if (_table[i]) |
112 | { |
113 | fprintf (stderr, "%8d, " , i); |
114 | if (field_width > _table[i]->_selchars_length) |
115 | fprintf (stderr, "%*s" , field_width - _table[i]->_selchars_length, "" ); |
116 | for (int j = 0; j < _table[i]->_selchars_length; j++) |
117 | putc (_table[i]->_selchars[j], stderr); |
118 | fprintf (stderr, ", %.*s\n" , |
119 | _table[i]->_allchars_length, _table[i]->_allchars); |
120 | } |
121 | |
122 | fprintf (stderr, "\nend dumping hash table\n\n" ); |
123 | } |
124 | |
125 | /* Compares two items. */ |
126 | inline bool |
127 | Hash_Table::equal (KeywordExt *item1, KeywordExt *item2) const |
128 | { |
129 | return item1->_selchars_length == item2->_selchars_length |
130 | && memcmp (item1->_selchars, item2->_selchars, |
131 | item2->_selchars_length * sizeof (unsigned int)) |
132 | == 0 |
133 | && (_ignore_length |
134 | || item1->_allchars_length == item2->_allchars_length); |
135 | } |
136 | |
137 | /* Attempts to insert ITEM in the table. If there is already an equal |
138 | entry in it, returns it. Otherwise inserts ITEM and returns NULL. */ |
139 | KeywordExt * |
140 | Hash_Table::insert (KeywordExt *item) |
141 | { |
142 | unsigned hash_val = |
143 | hashpjw (reinterpret_cast<const unsigned char *>(item->_selchars), |
144 | item->_selchars_length * sizeof (unsigned int)); |
145 | unsigned int probe = hash_val & (_size - 1); |
146 | unsigned int increment = |
147 | (((hash_val >> _log_size) |
148 | ^ (_ignore_length ? 0 : item->_allchars_length)) |
149 | << 1) + 1; |
150 | /* Note that because _size is a power of 2 and increment is odd, |
151 | we have gcd(increment,_size) = 1, which guarantees that we'll find |
152 | an empty entry during the loop. */ |
153 | |
154 | while (_table[probe] != NULL) |
155 | { |
156 | if (equal (_table[probe], item)) |
157 | return _table[probe]; |
158 | |
159 | _collisions++; |
160 | probe = (probe + increment) & (_size - 1); |
161 | } |
162 | |
163 | _table[probe] = item; |
164 | return NULL; |
165 | } |
166 | |