1 | /*------------------------------------------------------------------------ |
2 | * |
3 | * geqo_main.c |
4 | * solution to the query optimization problem |
5 | * by means of a Genetic Algorithm (GA) |
6 | * |
7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
8 | * Portions Copyright (c) 1994, Regents of the University of California |
9 | * |
10 | * src/backend/optimizer/geqo/geqo_main.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | |
15 | /* contributed by: |
16 | =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= |
17 | * Martin Utesch * Institute of Automatic Control * |
18 | = = University of Mining and Technology = |
19 | * utesch@aut.tu-freiberg.de * Freiberg, Germany * |
20 | =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= |
21 | */ |
22 | |
23 | /* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */ |
24 | |
25 | #include "postgres.h" |
26 | |
27 | #include <math.h> |
28 | |
29 | #include "optimizer/geqo_misc.h" |
30 | #include "optimizer/geqo_mutation.h" |
31 | #include "optimizer/geqo_pool.h" |
32 | #include "optimizer/geqo_random.h" |
33 | #include "optimizer/geqo_selection.h" |
34 | |
35 | |
36 | /* |
37 | * Configuration options |
38 | */ |
39 | int Geqo_effort; |
40 | int Geqo_pool_size; |
41 | int Geqo_generations; |
42 | double Geqo_selection_bias; |
43 | double Geqo_seed; |
44 | |
45 | |
46 | static int gimme_pool_size(int nr_rel); |
47 | static int gimme_number_generations(int pool_size); |
48 | |
49 | /* complain if no recombination mechanism is #define'd */ |
50 | #if !defined(ERX) && \ |
51 | !defined(PMX) && \ |
52 | !defined(CX) && \ |
53 | !defined(PX) && \ |
54 | !defined(OX1) && \ |
55 | !defined(OX2) |
56 | #error "must choose one GEQO recombination mechanism in geqo.h" |
57 | #endif |
58 | |
59 | |
60 | /* |
61 | * geqo |
62 | * solution of the query optimization problem |
63 | * similar to a constrained Traveling Salesman Problem (TSP) |
64 | */ |
65 | |
66 | RelOptInfo * |
67 | geqo(PlannerInfo *root, int number_of_rels, List *initial_rels) |
68 | { |
69 | GeqoPrivateData private; |
70 | int generation; |
71 | Chromosome *momma; |
72 | Chromosome *daddy; |
73 | Chromosome *kid; |
74 | Pool *pool; |
75 | int pool_size, |
76 | number_generations; |
77 | |
78 | #ifdef GEQO_DEBUG |
79 | int status_interval; |
80 | #endif |
81 | Gene *best_tour; |
82 | RelOptInfo *best_rel; |
83 | |
84 | #if defined(ERX) |
85 | Edge *edge_table; /* list of edges */ |
86 | int edge_failures = 0; |
87 | #endif |
88 | #if defined(CX) || defined(PX) || defined(OX1) || defined(OX2) |
89 | City *city_table; /* list of cities */ |
90 | #endif |
91 | #if defined(CX) |
92 | int cycle_diffs = 0; |
93 | int mutations = 0; |
94 | #endif |
95 | |
96 | /* set up private information */ |
97 | root->join_search_private = (void *) &private; |
98 | private.initial_rels = initial_rels; |
99 | |
100 | /* initialize private number generator */ |
101 | geqo_set_seed(root, Geqo_seed); |
102 | |
103 | /* set GA parameters */ |
104 | pool_size = gimme_pool_size(number_of_rels); |
105 | number_generations = gimme_number_generations(pool_size); |
106 | #ifdef GEQO_DEBUG |
107 | status_interval = 10; |
108 | #endif |
109 | |
110 | /* allocate genetic pool memory */ |
111 | pool = alloc_pool(root, pool_size, number_of_rels); |
112 | |
113 | /* random initialization of the pool */ |
114 | random_init_pool(root, pool); |
115 | |
116 | /* sort the pool according to cheapest path as fitness */ |
117 | sort_pool(root, pool); /* we have to do it only one time, since all |
118 | * kids replace the worst individuals in |
119 | * future (-> geqo_pool.c:spread_chromo ) */ |
120 | |
121 | #ifdef GEQO_DEBUG |
122 | elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f" , |
123 | pool_size, |
124 | pool->data[0].worth, |
125 | pool->data[pool_size - 1].worth); |
126 | #endif |
127 | |
128 | /* allocate chromosome momma and daddy memory */ |
129 | momma = alloc_chromo(root, pool->string_length); |
130 | daddy = alloc_chromo(root, pool->string_length); |
131 | |
132 | #if defined (ERX) |
133 | #ifdef GEQO_DEBUG |
134 | elog(DEBUG2, "using edge recombination crossover [ERX]" ); |
135 | #endif |
136 | /* allocate edge table memory */ |
137 | edge_table = alloc_edge_table(root, pool->string_length); |
138 | #elif defined(PMX) |
139 | #ifdef GEQO_DEBUG |
140 | elog(DEBUG2, "using partially matched crossover [PMX]" ); |
141 | #endif |
142 | /* allocate chromosome kid memory */ |
143 | kid = alloc_chromo(root, pool->string_length); |
144 | #elif defined(CX) |
145 | #ifdef GEQO_DEBUG |
146 | elog(DEBUG2, "using cycle crossover [CX]" ); |
147 | #endif |
148 | /* allocate city table memory */ |
149 | kid = alloc_chromo(root, pool->string_length); |
150 | city_table = alloc_city_table(root, pool->string_length); |
151 | #elif defined(PX) |
152 | #ifdef GEQO_DEBUG |
153 | elog(DEBUG2, "using position crossover [PX]" ); |
154 | #endif |
155 | /* allocate city table memory */ |
156 | kid = alloc_chromo(root, pool->string_length); |
157 | city_table = alloc_city_table(root, pool->string_length); |
158 | #elif defined(OX1) |
159 | #ifdef GEQO_DEBUG |
160 | elog(DEBUG2, "using order crossover [OX1]" ); |
161 | #endif |
162 | /* allocate city table memory */ |
163 | kid = alloc_chromo(root, pool->string_length); |
164 | city_table = alloc_city_table(root, pool->string_length); |
165 | #elif defined(OX2) |
166 | #ifdef GEQO_DEBUG |
167 | elog(DEBUG2, "using order crossover [OX2]" ); |
168 | #endif |
169 | /* allocate city table memory */ |
170 | kid = alloc_chromo(root, pool->string_length); |
171 | city_table = alloc_city_table(root, pool->string_length); |
172 | #endif |
173 | |
174 | |
175 | /* my pain main part: */ |
176 | /* iterative optimization */ |
177 | |
178 | for (generation = 0; generation < number_generations; generation++) |
179 | { |
180 | /* SELECTION: using linear bias function */ |
181 | geqo_selection(root, momma, daddy, pool, Geqo_selection_bias); |
182 | |
183 | #if defined (ERX) |
184 | /* EDGE RECOMBINATION CROSSOVER */ |
185 | gimme_edge_table(root, momma->string, daddy->string, pool->string_length, edge_table); |
186 | |
187 | kid = momma; |
188 | |
189 | /* are there any edge failures ? */ |
190 | edge_failures += gimme_tour(root, edge_table, kid->string, pool->string_length); |
191 | #elif defined(PMX) |
192 | /* PARTIALLY MATCHED CROSSOVER */ |
193 | pmx(root, momma->string, daddy->string, kid->string, pool->string_length); |
194 | #elif defined(CX) |
195 | /* CYCLE CROSSOVER */ |
196 | cycle_diffs = cx(root, momma->string, daddy->string, kid->string, pool->string_length, city_table); |
197 | /* mutate the child */ |
198 | if (cycle_diffs == 0) |
199 | { |
200 | mutations++; |
201 | geqo_mutation(root, kid->string, pool->string_length); |
202 | } |
203 | #elif defined(PX) |
204 | /* POSITION CROSSOVER */ |
205 | px(root, momma->string, daddy->string, kid->string, pool->string_length, city_table); |
206 | #elif defined(OX1) |
207 | /* ORDER CROSSOVER */ |
208 | ox1(root, momma->string, daddy->string, kid->string, pool->string_length, city_table); |
209 | #elif defined(OX2) |
210 | /* ORDER CROSSOVER */ |
211 | ox2(root, momma->string, daddy->string, kid->string, pool->string_length, city_table); |
212 | #endif |
213 | |
214 | |
215 | /* EVALUATE FITNESS */ |
216 | kid->worth = geqo_eval(root, kid->string, pool->string_length); |
217 | |
218 | /* push the kid into the wilderness of life according to its worth */ |
219 | spread_chromo(root, kid, pool); |
220 | |
221 | |
222 | #ifdef GEQO_DEBUG |
223 | if (status_interval && !(generation % status_interval)) |
224 | print_gen(stdout, pool, generation); |
225 | #endif |
226 | |
227 | } |
228 | |
229 | |
230 | #if defined(ERX) && defined(GEQO_DEBUG) |
231 | if (edge_failures != 0) |
232 | elog(LOG, "[GEQO] failures: %d, average: %d" , |
233 | edge_failures, (int) number_generations / edge_failures); |
234 | else |
235 | elog(LOG, "[GEQO] no edge failures detected" ); |
236 | #endif |
237 | |
238 | #if defined(CX) && defined(GEQO_DEBUG) |
239 | if (mutations != 0) |
240 | elog(LOG, "[GEQO] mutations: %d, generations: %d" , |
241 | mutations, number_generations); |
242 | else |
243 | elog(LOG, "[GEQO] no mutations processed" ); |
244 | #endif |
245 | |
246 | #ifdef GEQO_DEBUG |
247 | print_pool(stdout, pool, 0, pool_size - 1); |
248 | #endif |
249 | |
250 | #ifdef GEQO_DEBUG |
251 | elog(DEBUG1, "GEQO best is %.2f after %d generations" , |
252 | pool->data[0].worth, number_generations); |
253 | #endif |
254 | |
255 | |
256 | /* |
257 | * got the cheapest query tree processed by geqo; first element of the |
258 | * population indicates the best query tree |
259 | */ |
260 | best_tour = (Gene *) pool->data[0].string; |
261 | |
262 | best_rel = gimme_tree(root, best_tour, pool->string_length); |
263 | |
264 | if (best_rel == NULL) |
265 | elog(ERROR, "geqo failed to make a valid plan" ); |
266 | |
267 | /* DBG: show the query plan */ |
268 | #ifdef NOT_USED |
269 | print_plan(best_plan, root); |
270 | #endif |
271 | |
272 | /* ... free memory stuff */ |
273 | free_chromo(root, momma); |
274 | free_chromo(root, daddy); |
275 | |
276 | #if defined (ERX) |
277 | free_edge_table(root, edge_table); |
278 | #elif defined(PMX) |
279 | free_chromo(root, kid); |
280 | #elif defined(CX) |
281 | free_chromo(root, kid); |
282 | free_city_table(root, city_table); |
283 | #elif defined(PX) |
284 | free_chromo(root, kid); |
285 | free_city_table(root, city_table); |
286 | #elif defined(OX1) |
287 | free_chromo(root, kid); |
288 | free_city_table(root, city_table); |
289 | #elif defined(OX2) |
290 | free_chromo(root, kid); |
291 | free_city_table(root, city_table); |
292 | #endif |
293 | |
294 | free_pool(root, pool); |
295 | |
296 | /* ... clear root pointer to our private storage */ |
297 | root->join_search_private = NULL; |
298 | |
299 | return best_rel; |
300 | } |
301 | |
302 | |
303 | /* |
304 | * Return either configured pool size or a good default |
305 | * |
306 | * The default is based on query size (no. of relations) = 2^(QS+1), |
307 | * but constrained to a range based on the effort value. |
308 | */ |
309 | static int |
310 | gimme_pool_size(int nr_rel) |
311 | { |
312 | double size; |
313 | int minsize; |
314 | int maxsize; |
315 | |
316 | /* Legal pool size *must* be at least 2, so ignore attempt to select 1 */ |
317 | if (Geqo_pool_size >= 2) |
318 | return Geqo_pool_size; |
319 | |
320 | size = pow(2.0, nr_rel + 1.0); |
321 | |
322 | maxsize = 50 * Geqo_effort; /* 50 to 500 individuals */ |
323 | if (size > maxsize) |
324 | return maxsize; |
325 | |
326 | minsize = 10 * Geqo_effort; /* 10 to 100 individuals */ |
327 | if (size < minsize) |
328 | return minsize; |
329 | |
330 | return (int) ceil(size); |
331 | } |
332 | |
333 | |
334 | /* |
335 | * Return either configured number of generations or a good default |
336 | * |
337 | * The default is the same as the pool size, which allows us to be |
338 | * sure that less-fit individuals get pushed out of the breeding |
339 | * population before the run finishes. |
340 | */ |
341 | static int |
342 | gimme_number_generations(int pool_size) |
343 | { |
344 | if (Geqo_generations > 0) |
345 | return Geqo_generations; |
346 | |
347 | return pool_size; |
348 | } |
349 | |