1 | /* This may look like C code, but it is really -*- C++ -*- */ |
2 | |
3 | /* Search algorithm. |
4 | |
5 | Copyright (C) 1989-1998, 2000, 2002, 2009 Free Software Foundation, Inc. |
6 | Written by Douglas C. Schmidt <schmidt@ics.uci.edu> |
7 | and Bruno Haible <bruno@clisp.org>. |
8 | |
9 | This file is part of GNU GPERF. |
10 | |
11 | This program is free software: you can redistribute it and/or modify |
12 | it under the terms of the GNU General Public License as published by |
13 | the Free Software Foundation; either version 3 of the License, or |
14 | (at your option) any later version. |
15 | |
16 | This program is distributed in the hope that it will be useful, |
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | GNU General Public License for more details. |
20 | |
21 | You should have received a copy of the GNU General Public License |
22 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
23 | |
24 | #ifndef search_h |
25 | #define search_h 1 |
26 | |
27 | #include "keyword-list.h" |
28 | #include "positions.h" |
29 | #include "bool-array.h" |
30 | |
31 | struct EquivalenceClass; |
32 | |
33 | class Search |
34 | { |
35 | public: |
36 | Search (KeywordExt_List *list); |
37 | ~Search (); |
38 | void optimize (); |
39 | private: |
40 | void prepare (); |
41 | |
42 | /* Computes the upper bound on the indices passed to asso_values[], |
43 | assuming no alpha_increments. */ |
44 | unsigned int compute_alpha_size () const; |
45 | |
46 | /* Computes the unification rules between different asso_values[c], |
47 | assuming no alpha_increments. */ |
48 | unsigned int * compute_alpha_unify () const; |
49 | |
50 | /* Initializes each keyword's _selchars array. */ |
51 | void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const; |
52 | /* Deletes each keyword's _selchars array. */ |
53 | void delete_selchars () const; |
54 | |
55 | /* Count the duplicate keywords that occur with a given set of positions. */ |
56 | unsigned int count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const; |
57 | |
58 | /* Find good key positions. */ |
59 | void find_positions (); |
60 | |
61 | /* Count the duplicate keywords that occur with the found set of positions. */ |
62 | unsigned int count_duplicates_tuple () const; |
63 | |
64 | /* Computes the upper bound on the indices passed to asso_values[]. */ |
65 | unsigned int compute_alpha_size (const unsigned int *alpha_inc) const; |
66 | |
67 | /* Computes the unification rules between different asso_values[c]. */ |
68 | unsigned int * compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const; |
69 | |
70 | /* Initializes each keyword's _selchars array. */ |
71 | void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const; |
72 | |
73 | /* Count the duplicate keywords that occur with the given set of positions |
74 | and a given alpha_inc[] array. */ |
75 | unsigned int count_duplicates_multiset (const unsigned int *alpha_inc) const; |
76 | |
77 | /* Find good _alpha_inc[]. */ |
78 | void find_alpha_inc (); |
79 | |
80 | /* Initializes the asso_values[] related parameters. */ |
81 | void prepare_asso_values (); |
82 | |
83 | EquivalenceClass * compute_partition (bool *undetermined) const; |
84 | |
85 | unsigned int count_possible_collisions (EquivalenceClass *partition, unsigned int c) const; |
86 | |
87 | bool unchanged_partition (EquivalenceClass *partition, unsigned int c) const; |
88 | |
89 | /* Finds some _asso_values[] that fit. */ |
90 | void find_asso_values (); |
91 | |
92 | /* Computes a keyword's hash value, relative to the current _asso_values[], |
93 | and stores it in keyword->_hash_value. */ |
94 | int compute_hash (KeywordExt *keyword) const; |
95 | |
96 | /* Finds good _asso_values[]. */ |
97 | void find_good_asso_values (); |
98 | |
99 | /* Sorts the keyword list by hash value. */ |
100 | void sort (); |
101 | |
102 | public: |
103 | |
104 | /* Linked list of keywords. */ |
105 | KeywordExt_List * _head; |
106 | |
107 | /* Total number of keywords, counting duplicates. */ |
108 | int _total_keys; |
109 | |
110 | /* Maximum length of the longest keyword. */ |
111 | int _max_key_len; |
112 | |
113 | /* Minimum length of the shortest keyword. */ |
114 | int _min_key_len; |
115 | |
116 | /* Whether the hash function includes the length. */ |
117 | bool _hash_includes_len; |
118 | |
119 | /* User-specified or computed key positions. */ |
120 | Positions _key_positions; |
121 | |
122 | /* Adjustments to add to bytes add specific key positions. */ |
123 | unsigned int * _alpha_inc; |
124 | |
125 | /* Size of alphabet. */ |
126 | unsigned int _alpha_size; |
127 | |
128 | /* Alphabet character unification, either the identity or a mapping from |
129 | upper case characters to lower case characters (and maybe more). */ |
130 | unsigned int * _alpha_unify; |
131 | |
132 | /* Maximum _selchars_length over all keywords. */ |
133 | unsigned int _max_selchars_length; |
134 | |
135 | /* Total number of duplicates that have been moved to _duplicate_link lists |
136 | (not counting their representatives which stay on the main list). */ |
137 | int _total_duplicates; |
138 | |
139 | /* Counts occurrences of each key set character. |
140 | _occurrences[c] is the number of times that c occurs among the _selchars |
141 | of a keyword. */ |
142 | int * _occurrences; |
143 | /* Value associated with each character. */ |
144 | int * _asso_values; |
145 | |
146 | private: |
147 | |
148 | /* Length of _head list. Number of keywords, not counting duplicates. */ |
149 | int _list_len; |
150 | |
151 | /* Exclusive upper bound for every _asso_values[c]. A power of 2. */ |
152 | unsigned int _asso_value_max; |
153 | |
154 | /* Initial value for asso_values table. -1 means random. */ |
155 | int _initial_asso_value; |
156 | /* Jump length when trying alternative values. 0 means random. */ |
157 | int _jump; |
158 | |
159 | /* Maximal possible hash value. */ |
160 | int _max_hash_value; |
161 | |
162 | /* Sparse bit vector for collision detection. */ |
163 | Bool_Array * _collision_detector; |
164 | }; |
165 | |
166 | #endif |
167 | |