1/* This may look like C code, but it is really -*- C++ -*- */
2
3/* Search algorithm.
4
5 Copyright (C) 1989-1998, 2000, 2002, 2009 Free Software Foundation, Inc.
6 Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
7 and Bruno Haible <bruno@clisp.org>.
8
9 This file is part of GNU GPERF.
10
11 This program is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23
24#ifndef search_h
25#define search_h 1
26
27#include "keyword-list.h"
28#include "positions.h"
29#include "bool-array.h"
30
31struct EquivalenceClass;
32
33class Search
34{
35public:
36 Search (KeywordExt_List *list);
37 ~Search ();
38 void optimize ();
39private:
40 void prepare ();
41
42 /* Computes the upper bound on the indices passed to asso_values[],
43 assuming no alpha_increments. */
44 unsigned int compute_alpha_size () const;
45
46 /* Computes the unification rules between different asso_values[c],
47 assuming no alpha_increments. */
48 unsigned int * compute_alpha_unify () const;
49
50 /* Initializes each keyword's _selchars array. */
51 void init_selchars_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
52 /* Deletes each keyword's _selchars array. */
53 void delete_selchars () const;
54
55 /* Count the duplicate keywords that occur with a given set of positions. */
56 unsigned int count_duplicates_tuple (const Positions& positions, const unsigned int *alpha_unify) const;
57
58 /* Find good key positions. */
59 void find_positions ();
60
61 /* Count the duplicate keywords that occur with the found set of positions. */
62 unsigned int count_duplicates_tuple () const;
63
64 /* Computes the upper bound on the indices passed to asso_values[]. */
65 unsigned int compute_alpha_size (const unsigned int *alpha_inc) const;
66
67 /* Computes the unification rules between different asso_values[c]. */
68 unsigned int * compute_alpha_unify (const Positions& positions, const unsigned int *alpha_inc) const;
69
70 /* Initializes each keyword's _selchars array. */
71 void init_selchars_multiset (const Positions& positions, const unsigned int *alpha_unify, const unsigned int *alpha_inc) const;
72
73 /* Count the duplicate keywords that occur with the given set of positions
74 and a given alpha_inc[] array. */
75 unsigned int count_duplicates_multiset (const unsigned int *alpha_inc) const;
76
77 /* Find good _alpha_inc[]. */
78 void find_alpha_inc ();
79
80 /* Initializes the asso_values[] related parameters. */
81 void prepare_asso_values ();
82
83 EquivalenceClass * compute_partition (bool *undetermined) const;
84
85 unsigned int count_possible_collisions (EquivalenceClass *partition, unsigned int c) const;
86
87 bool unchanged_partition (EquivalenceClass *partition, unsigned int c) const;
88
89 /* Finds some _asso_values[] that fit. */
90 void find_asso_values ();
91
92 /* Computes a keyword's hash value, relative to the current _asso_values[],
93 and stores it in keyword->_hash_value. */
94 int compute_hash (KeywordExt *keyword) const;
95
96 /* Finds good _asso_values[]. */
97 void find_good_asso_values ();
98
99 /* Sorts the keyword list by hash value. */
100 void sort ();
101
102public:
103
104 /* Linked list of keywords. */
105 KeywordExt_List * _head;
106
107 /* Total number of keywords, counting duplicates. */
108 int _total_keys;
109
110 /* Maximum length of the longest keyword. */
111 int _max_key_len;
112
113 /* Minimum length of the shortest keyword. */
114 int _min_key_len;
115
116 /* Whether the hash function includes the length. */
117 bool _hash_includes_len;
118
119 /* User-specified or computed key positions. */
120 Positions _key_positions;
121
122 /* Adjustments to add to bytes add specific key positions. */
123 unsigned int * _alpha_inc;
124
125 /* Size of alphabet. */
126 unsigned int _alpha_size;
127
128 /* Alphabet character unification, either the identity or a mapping from
129 upper case characters to lower case characters (and maybe more). */
130 unsigned int * _alpha_unify;
131
132 /* Maximum _selchars_length over all keywords. */
133 unsigned int _max_selchars_length;
134
135 /* Total number of duplicates that have been moved to _duplicate_link lists
136 (not counting their representatives which stay on the main list). */
137 int _total_duplicates;
138
139 /* Counts occurrences of each key set character.
140 _occurrences[c] is the number of times that c occurs among the _selchars
141 of a keyword. */
142 int * _occurrences;
143 /* Value associated with each character. */
144 int * _asso_values;
145
146private:
147
148 /* Length of _head list. Number of keywords, not counting duplicates. */
149 int _list_len;
150
151 /* Exclusive upper bound for every _asso_values[c]. A power of 2. */
152 unsigned int _asso_value_max;
153
154 /* Initial value for asso_values table. -1 means random. */
155 int _initial_asso_value;
156 /* Jump length when trying alternative values. 0 means random. */
157 int _jump;
158
159 /* Maximal possible hash value. */
160 int _max_hash_value;
161
162 /* Sparse bit vector for collision detection. */
163 Bool_Array * _collision_detector;
164};
165
166#endif
167