1 | /* $Id$Revision: */ |
2 | /* vim:set shiftwidth=4 ts=8: */ |
3 | |
4 | /************************************************************************* |
5 | * Copyright (c) 2011 AT&T Intellectual Property |
6 | * All rights reserved. This program and the accompanying materials |
7 | * are made available under the terms of the Eclipse Public License v1.0 |
8 | * which accompanies this distribution, and is available at |
9 | * http://www.eclipse.org/legal/epl-v10.html |
10 | * |
11 | * Contributors: See CVS logs. Details at http://www.graphviz.org/ |
12 | *************************************************************************/ |
13 | |
14 | /* Modularity Quality definitation: |
15 | |
16 | We assume undirected graph. Directed graph should be converted by summing edge weights. |
17 | |
18 | Given a partition P of V into k clusters. |
19 | |
20 | Let E(i,j) be the set of edges between cluster i and j. |
21 | Let |E(i,j)| be the sum of edge weights of edges in E(i,j). |
22 | |
23 | Let E(i,i) be the set of edges within cluster i, but excluding self-edges. |
24 | Let |E(i,i)| be the sum of edge weights of edges in E(i,i). |
25 | |
26 | Let V(i) be the sets of vertices in i |
27 | |
28 | The intra-cluster edges concentration for a cluster i is |
29 | (the denominator could be |V(i)|*(|V(i)-1)/2 strictly speaking as we exclude self-edges): |
30 | |
31 | |E(i,i)| |
32 | ----------- |
33 | (|V(i)|^2/2) |
34 | |
35 | The inter-cluster edges concentration between cluster i and j is |
36 | |
37 | |E(i,j)| |
38 | ------------ |
39 | |V(i)|*|V(j)| |
40 | |
41 | So the cluster index is defined as the average intra cluster edge concentration, minus |
42 | the inter-cluster edge concentration: |
43 | |
44 | . |E(i,i)| |E(i,j)| |
45 | MQ(P) = (1/k) * \sum_{i=1...k} ------------ - (1/(k*(k-1)/2)) * \sum_{i<j} ------------------- = mq_in/k - mq_out/(k*(k-1)/2) |
46 | . (|V(i)|^2/2) |V(i)|*|V(j)| |
47 | |
48 | or |
49 | |
50 | . |E(i,i)| |E(i,j)| |
51 | MQ(P)/2 = (1/k) * \sum_{i=1...k} ------------ - (1/(k*(k-1))) * \sum_{i<j} ------------------ = mq_in/k - mq_out/(k*(k-1)) |
52 | . |V(i)|^2 |V(i)|*|V(j)| |
53 | |
54 | Notice that if we assume the graph is unweights (edge weights = 1), then 0<= MQ <= 1. |
55 | For weighted graph, MQ may not be within 0 to 1. We could normalized it, but |
56 | for comparing clustering quality of the same graph but different partitioning, this |
57 | unnormalized quantity is not a problem. |
58 | |
59 | */ |
60 | |
61 | #define STANDALONE |
62 | #include "general.h" |
63 | #include "SparseMatrix.h" |
64 | #include "mq.h" |
65 | #include "LinkedList.h" |
66 | |
67 | static real get_mq(SparseMatrix A, int *assignment, int *ncluster0, real *mq_in0, real *mq_out0, real **dout0){ |
68 | /* given a symmetric matrix representation of a graph and an assignment of nodes into clusters, calculate the modularity quality. |
69 | assignment: assignmenet[i] gives the cluster assignment of node i. 0 <= assignment[i] < ncluster. |
70 | ncluster: number of clusters |
71 | mq_in: the part of MQ to do with intra-cluster edges, before divide by 1/k |
72 | mq_out: the part of MQ to do with inter-cluster edges, before divide by 1/(k*(k-1)) |
73 | mq = 2*(mq_in/k - mq_out/(k*(k-1))); |
74 | */ |
75 | int ncluster = 0; |
76 | int n = A->m; |
77 | int test_pattern_symmetry_only = FALSE; |
78 | int *counts, *ia = A->ia, *ja = A->ja, k, i, j, jj; |
79 | real mq_in = 0, mq_out = 0, *a = NULL, Vi, Vj; |
80 | int c; |
81 | real *dout; |
82 | |
83 | |
84 | assert(SparseMatrix_is_symmetric(A, test_pattern_symmetry_only)); |
85 | assert(A->n == n); |
86 | if (A->type == MATRIX_TYPE_REAL) a = (real*) A->a; |
87 | |
88 | counts = MALLOC(sizeof(int)*n); |
89 | |
90 | for (i = 0; i < n; i++) counts[i] = 0; |
91 | |
92 | for (i = 0; i < n; i++){ |
93 | assert(assignment[i] >= 0 && assignment[i] < n); |
94 | if (counts[assignment[i]] == 0) ncluster++; |
95 | counts[assignment[i]]++; |
96 | } |
97 | k = ncluster; |
98 | assert(ncluster <= n); |
99 | |
100 | for (i = 0; i < n; i++){ |
101 | assert(assignment[i] < ncluster); |
102 | c = assignment[i]; |
103 | Vi = counts[c]; |
104 | for (j = ia[i] ; j < ia[i+1]; j++){ |
105 | /* ASSUME UNDIRECTED */ |
106 | jj = ja[j]; |
107 | if (jj >= i) continue; |
108 | assert(assignment[jj] < ncluster); |
109 | Vj = counts[assignment[jj]]; |
110 | if (assignment[jj] == c){ |
111 | if (a) { |
112 | mq_in += a[j]/(Vi*Vi); |
113 | } else { |
114 | mq_in += 1./(Vi*Vi); |
115 | } |
116 | } else { |
117 | if (a) { |
118 | mq_out += a[j]/(Vi*Vj); |
119 | } else { |
120 | mq_out += 1./(Vi*Vj); |
121 | } |
122 | } |
123 | |
124 | } |
125 | } |
126 | |
127 | /* calculate scaled out degree */ |
128 | dout = MALLOC(sizeof(real)*n); |
129 | for (i = 0; i < n; i++){ |
130 | dout[i] = 0; |
131 | for (j = ia[i]; j < ia[i+1]; j++){ |
132 | jj = ja[j]; |
133 | if (jj == i) continue; |
134 | if (a){ |
135 | dout[i] += a[j]/(real) counts[assignment[jj]]; |
136 | } else { |
137 | dout[i] += 1./(real) counts[assignment[jj]]; |
138 | } |
139 | } |
140 | } |
141 | |
142 | *ncluster0 = k; |
143 | *mq_in0 = mq_in; |
144 | *mq_out0 = mq_out; |
145 | *dout0 = dout; |
146 | FREE(counts); |
147 | |
148 | if (k > 1){ |
149 | return 2*(mq_in/k - mq_out/(k*(k-1))); |
150 | } else { |
151 | return 2*mq_in; |
152 | } |
153 | } |
154 | |
155 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_init(SparseMatrix A, int level){ |
156 | Multilevel_MQ_Clustering grid; |
157 | int n = A->n, i; |
158 | int *matching; |
159 | |
160 | assert(A->type == MATRIX_TYPE_REAL); |
161 | assert(SparseMatrix_is_symmetric(A, FALSE)); |
162 | |
163 | if (!A) return NULL; |
164 | assert(A->m == n); |
165 | grid = MALLOC(sizeof(struct Multilevel_MQ_Clustering_struct)); |
166 | grid->level = level; |
167 | grid->n = n; |
168 | grid->A = A; |
169 | grid->P = NULL; |
170 | grid->R = NULL; |
171 | grid->next = NULL; |
172 | grid->prev = NULL; |
173 | grid->delete_top_level_A = FALSE; |
174 | matching = grid->matching = MALLOC(sizeof(real)*(n)); |
175 | grid->deg_intra = NULL; |
176 | grid->dout = NULL; |
177 | grid->wgt = NULL; |
178 | |
179 | if (level == 0){ |
180 | real mq = 0, mq_in, mq_out; |
181 | int n = A->n, ncluster; |
182 | real *deg_intra, *wgt, *dout; |
183 | |
184 | grid->deg_intra = MALLOC(sizeof(real)*(n)); |
185 | deg_intra = grid->deg_intra; |
186 | |
187 | grid->wgt = MALLOC(sizeof(real)*n); |
188 | wgt = grid->wgt; |
189 | |
190 | for (i = 0; i < n; i++){ |
191 | deg_intra[i] = 0; |
192 | wgt[i] = 1.; |
193 | } |
194 | for (i = 0; i < n; i++) matching[i] = i; |
195 | mq = get_mq(A, matching, &ncluster, &mq_in, &mq_out, &dout); |
196 | fprintf(stderr,"ncluster = %d, mq = %f\n" , ncluster, mq); |
197 | grid->mq = mq; |
198 | grid->mq_in = mq_in; |
199 | grid->mq_out = mq_out; |
200 | grid->dout = dout; |
201 | grid->ncluster = ncluster; |
202 | |
203 | } |
204 | |
205 | |
206 | return grid; |
207 | } |
208 | |
209 | void Multilevel_MQ_Clustering_delete(Multilevel_MQ_Clustering grid){ |
210 | if (!grid) return; |
211 | if (grid->A){ |
212 | if (grid->level == 0) { |
213 | if (grid->delete_top_level_A) SparseMatrix_delete(grid->A); |
214 | } else { |
215 | SparseMatrix_delete(grid->A); |
216 | } |
217 | } |
218 | SparseMatrix_delete(grid->P); |
219 | SparseMatrix_delete(grid->R); |
220 | FREE(grid->matching); |
221 | FREE(grid->deg_intra); |
222 | FREE(grid->dout); |
223 | FREE(grid->wgt); |
224 | Multilevel_MQ_Clustering_delete(grid->next); |
225 | FREE(grid); |
226 | } |
227 | |
228 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_establish(Multilevel_MQ_Clustering grid, int maxcluster){ |
229 | int *matching = grid->matching; |
230 | SparseMatrix A = grid->A; |
231 | int n = grid->n, level = grid->level, nc = 0, nclusters = n; |
232 | real mq = 0, mq_in = 0, mq_out = 0, mq_new, mq_in_new, mq_out_new, mq_max = 0, mq_in_max = 0, mq_out_max = 0; |
233 | int *ia = A->ia, *ja = A->ja; |
234 | real *a, amax = 0; |
235 | real *deg_intra = grid->deg_intra, *wgt = grid->wgt; |
236 | real *deg_intra_new, *wgt_new = NULL; |
237 | int i, j, k, jj, jc, jmax; |
238 | real *deg_inter, gain = 0, *dout = grid->dout, *dout_new, deg_in_i, deg_in_j, wgt_i, wgt_j, a_ij, dout_i, dout_j, dout_max = 0, wgt_jmax = 0; |
239 | int *mask; |
240 | real maxgain = 0; |
241 | real total_gain = 0; |
242 | SingleLinkedList *neighbors = NULL, lst; |
243 | |
244 | |
245 | neighbors = MALLOC(sizeof(SingleLinkedList)*n); |
246 | for (i = 0; i < n; i++) neighbors[i] = NULL; |
247 | |
248 | mq = grid->mq; |
249 | mq_in = grid->mq_in; |
250 | mq_out = grid->mq_out; |
251 | |
252 | deg_intra_new = MALLOC(sizeof(real)*n); |
253 | wgt_new = MALLOC(sizeof(real)*n); |
254 | deg_inter = MALLOC(sizeof(real)*n); |
255 | mask = MALLOC(sizeof(int)*n); |
256 | dout_new = MALLOC(sizeof(real)*n); |
257 | for (i = 0; i < n; i++) mask[i] = -1; |
258 | |
259 | assert(n == A->n); |
260 | for (i = 0; i < n; i++) matching[i] = UNMATCHED; |
261 | |
262 | /* gain in merging node A into cluster B is |
263 | mq_in_new = mq_in - |E(A,A)|/(V(A))^2 - |E(B,B)|/(V(B))^2 + (|E(A,A)|+|E(B,B)|+|E(A,B)|)/(|V(A)|+|V(B)|)^2 |
264 | . = mq_in - deg_intra(A)/|A|^2 - deg_intra(B)/|B|^2 + (deg_intra(A)+deg_intra(B)+a(A,B))/(|A|+|B|)^2 |
265 | |
266 | mq_out_new = mq_out - |E(A,B)|/(|V(A)|*V(B)|)-\sum_{C and A connected, C!=B} |E(A,C)|/(|V(A)|*|V(C)|)-\sum_{C and B connected,C!=B} |E(B,C)|/(|V(B)|*|V(C)|) |
267 | . + \sum_{C connected to A or B, C!=A, C!=B} (|E(A,C)|+|E(B,C)|)/(|V(C)|*(|V(A)|+|V(B)|) |
268 | . = mq_out + a(A,B)/(|A|*|B|)-\sum_{C and A connected} a(A,C)/(|A|*|C|)-\sum_{C and B connected} a(B,C)/(|B|*|C|) |
269 | . + \sum_{C connected to A or B, C!=A, C!=B} (a(A,C)+a(B,C))/(|C|*(|A|+|B|)) |
270 | Denote: |
271 | dout(i) = \sum_{j -- i} a(i,j)/|j| |
272 | then |
273 | |
274 | mq_out_new = mq_out - |E(A,B)|/(|V(A)|*V(B)|)-\sum_{C and A connected, C!=B} |E(A,C)|/(|V(A)|*|V(C)|)-\sum_{C and B connected,C!=B} |E(B,C)|/(|V(B)|*|V(C)|) |
275 | . + \sum_{C connected to A or B, C!=A, C!=B} (|E(A,C)|+|E(B,C)|)/(|V(C)|*(|V(A)|+|V(B)|) |
276 | . = mq_out + a(A,B)/(|A|*|B|)-dout(A)/|A| - dout(B)/|B| |
277 | . + (dout(A)+dout(B))/(|A|+|B|) - (a(A,B)/|A|+a(A,B)/|B|)/(|A|+|B|) |
278 | . = mq_out -dout(A)/|A| - dout(B)/|B| + (dout(A)+dout(B))/(|A|+|B|) |
279 | after merging A and B into cluster AB, |
280 | dout(AB) = dout(A) + dout(B); |
281 | dout(C) := dout(C) - a(A,C)/|A| - a(B,C)/|B| + a(A,C)/(|A|+|B|) + a(B, C)/(|A|+|B|) |
282 | |
283 | mq_new = mq_in_new/(k-1) - mq_out_new/((k-1)*(k-2)) |
284 | gain = mq_new - mq |
285 | */ |
286 | a = (real*) A->a; |
287 | for (i = 0; i < n; i++){ |
288 | if (matching[i] != UNMATCHED) continue; |
289 | /* accumulate connections between i and clusters */ |
290 | for (j = ia[i]; j < ia[i+1]; j++){ |
291 | jj = ja[j]; |
292 | if (jj == i) continue; |
293 | if ((jc=matching[jj]) != UNMATCHED){ |
294 | if (mask[jc] != i) { |
295 | mask[jc] = i; |
296 | deg_inter[jc] = a[j]; |
297 | } else { |
298 | deg_inter[jc] += a[j]; |
299 | } |
300 | } |
301 | } |
302 | deg_in_i = deg_intra[i]; |
303 | wgt_i = wgt[i]; |
304 | dout_i = dout[i]; |
305 | |
306 | maxgain = 0; |
307 | jmax = -1; |
308 | for (j = ia[i]; j < ia[i+1]; j++){ |
309 | jj = ja[j]; |
310 | if (jj == i) continue; |
311 | jc = matching[jj]; |
312 | if (jc == UNMATCHED){ |
313 | a_ij = a[j]; |
314 | wgt_j = wgt[jj]; |
315 | deg_in_j = deg_intra[jj]; |
316 | dout_j = dout[jj]; |
317 | } else if (deg_inter[jc] < 0){ |
318 | continue; |
319 | } else { |
320 | a_ij = deg_inter[jc]; |
321 | wgt_j = wgt_new[jc]; |
322 | deg_inter[jc] = -1; /* so that we do not redo the calulation when we hit another neighbor in cluster jc */ |
323 | deg_in_j = deg_intra_new[jc]; |
324 | dout_j = dout_new[jc]; |
325 | } |
326 | |
327 | mq_in_new = mq_in - deg_in_i/pow(wgt_i, 2) - deg_in_j/pow(wgt_j,2) |
328 | + (deg_in_i + deg_in_j + a_ij)/pow(wgt_i + wgt_j,2); |
329 | |
330 | mq_out_new = mq_out - dout_i/wgt_i - dout_j/wgt_j + (dout_i + dout_j)/(wgt_i + wgt_j); |
331 | |
332 | if (nclusters > 2){ |
333 | mq_new = 2*(mq_in_new/(nclusters - 1) - mq_out_new/((nclusters - 1)*(nclusters - 2))); |
334 | } else { |
335 | mq_new = 2*mq_in_new/(nclusters - 1); |
336 | } |
337 | |
338 | #ifdef DEBUG |
339 | {int ncluster; |
340 | double mq2, mq_in2, mq_out2, *dout2; |
341 | int *matching2, nc2 = nc; |
342 | matching2 = MALLOC(sizeof(int)*A->m); |
343 | matching2 = MEMCPY(matching2, matching, sizeof(real)*A->m); |
344 | if (jc != UNMATCHED) { |
345 | matching2[i] = jc; |
346 | } else { |
347 | matching2[i] = nc2; |
348 | matching2[jj] = nc2; |
349 | nc2++; |
350 | } |
351 | for (k = 0; k < n; k++) if (matching2[k] == UNMATCHED) matching2[k] =nc2++; |
352 | mq2 = get_mq(A, matching2, &ncluster, &mq_in2, &mq_out2, &dout2); |
353 | fprintf(stderr," {dout_i, dout_j}={%f,%f}, {predicted, calculated}: mq = {%f, %f}, mq_in ={%f,%f}, mq_out = {%f,%f}\n" ,dout_i, dout_j, mq_new, mq2, mq_in_new, mq_in2, mq_out_new, mq_out2); |
354 | |
355 | mq_new = mq2; |
356 | |
357 | } |
358 | #endif |
359 | |
360 | gain = mq_new - mq; |
361 | if (Verbose) fprintf(stderr,"gain in merging node %d with node %d = %f-%f = %f\n" , i, jj, mq, mq_new, gain); |
362 | if (j == ia[i] || gain > maxgain){ |
363 | maxgain = gain; |
364 | jmax = jj; |
365 | amax = a_ij; |
366 | dout_max = dout_j; |
367 | wgt_jmax = wgt_j; |
368 | mq_max = mq_new; |
369 | mq_in_max = mq_in_new; |
370 | mq_out_max = mq_out_new; |
371 | } |
372 | |
373 | } |
374 | |
375 | /* now merge i and jmax */ |
376 | if (maxgain > 0 || (nc >= 1 && nc > maxcluster)){ |
377 | total_gain += maxgain; |
378 | jc = matching[jmax]; |
379 | if (jc == UNMATCHED){ |
380 | fprintf(stderr, "maxgain=%f, merge %d, %d\n" ,maxgain, i, jmax); |
381 | neighbors[nc] = SingleLinkedList_new_int(jmax); |
382 | neighbors[nc] = SingleLinkedList_prepend_int(neighbors[nc], i); |
383 | dout_new[nc] = dout_i + dout_max; |
384 | matching[i] = matching[jmax] = nc; |
385 | wgt_new[nc] = wgt[i] + wgt[jmax]; |
386 | deg_intra_new[nc] = deg_intra[i] + deg_intra[jmax] + amax; |
387 | nc++; |
388 | } else { |
389 | fprintf(stderr,"maxgain=%f, merge with existing cluster %d, %d\n" ,maxgain, i, jc); |
390 | neighbors[jc] = SingleLinkedList_prepend_int(neighbors[jc], i); |
391 | dout_new[jc] = dout_i + dout_max; |
392 | wgt_new[jc] += wgt[i]; |
393 | matching[i] = jc; |
394 | deg_intra_new[jc] += deg_intra[i] + amax; |
395 | } |
396 | mq = mq_max; |
397 | mq_in = mq_in_max; |
398 | mq_out = mq_out_max; |
399 | nclusters--; |
400 | } else { |
401 | fprintf(stderr,"gain: %f -- no gain, skip merging node %d\n" , maxgain, i); |
402 | assert(maxgain <= 0); |
403 | neighbors[nc] = SingleLinkedList_new_int(i); |
404 | matching[i] = nc; |
405 | deg_intra_new[nc] = deg_intra[i]; |
406 | wgt_new[nc] = wgt[i]; |
407 | nc++; |
408 | } |
409 | |
410 | |
411 | /* update scaled outdegree of neighbors of i and its merged node/cluster jmax */ |
412 | jc = matching[i]; |
413 | lst = neighbors[jc]; |
414 | do { |
415 | mask[*((int*) SingleLinkedList_get_data(lst))] = n+i; |
416 | lst = SingleLinkedList_get_next(lst); |
417 | } while (lst); |
418 | |
419 | lst = neighbors[jc]; |
420 | |
421 | do { |
422 | k = *((int*) SingleLinkedList_get_data(lst)); |
423 | for (j = ia[k]; j < ia[k+1]; j++){ |
424 | jj = ja[j]; |
425 | if (mask[jj] == n+i) continue;/* link to within cluster */ |
426 | if ((jc = matching[jj]) == UNMATCHED){ |
427 | if (k == i){ |
428 | dout[jj] += -a[j]/wgt_i + a[j]/(wgt_i + wgt_jmax); |
429 | } else { |
430 | dout[jj] += -a[j]/wgt_jmax + a[j]/(wgt_i + wgt_jmax); |
431 | } |
432 | } else { |
433 | if (k == i){ |
434 | dout_new[jc] += -a[j]/wgt_i + a[j]/(wgt_i + wgt_jmax); |
435 | } else { |
436 | dout_new[jc] += -a[j]/wgt_jmax + a[j]/(wgt_i + wgt_jmax); |
437 | } |
438 | } |
439 | } |
440 | lst = SingleLinkedList_get_next(lst); |
441 | } while (lst); |
442 | |
443 | } |
444 | |
445 | fprintf(stderr,"verbose=%d\n" ,Verbose); |
446 | if (Verbose) fprintf(stderr,"mq = %f new mq = %f level = %d, n = %d, nc = %d, gain = %g, mq_in = %f, mq_out = %f\n" , mq, mq + total_gain, |
447 | level, n, nc, total_gain, mq_in, mq_out); |
448 | |
449 | #ifdef DEBUG |
450 | {int ncluster; |
451 | |
452 | mq = get_mq(A, matching, &ncluster, &mq_in, &mq_out, &dout); |
453 | fprintf(stderr," mq = %f\n" ,mq); |
454 | |
455 | } |
456 | #endif |
457 | |
458 | if (nc >= 1 && (total_gain > 0 || nc < n)){ |
459 | /* now set up restriction and prolongation operator */ |
460 | SparseMatrix P, R, R0, B, cA; |
461 | real one = 1.; |
462 | Multilevel_MQ_Clustering cgrid; |
463 | |
464 | R0 = SparseMatrix_new(nc, n, 1, MATRIX_TYPE_REAL, FORMAT_COORD); |
465 | for (i = 0; i < n; i++){ |
466 | jj = matching[i]; |
467 | SparseMatrix_coordinate_form_add_entries(R0, 1, &jj, &i, &one); |
468 | } |
469 | R = SparseMatrix_from_coordinate_format(R0); |
470 | SparseMatrix_delete(R0); |
471 | P = SparseMatrix_transpose(R); |
472 | B = SparseMatrix_multiply(R, A); |
473 | if (!B) goto RETURN; |
474 | cA = SparseMatrix_multiply(B, P); |
475 | if (!cA) goto RETURN; |
476 | SparseMatrix_delete(B); |
477 | grid->P = P; |
478 | grid->R = R; |
479 | level++; |
480 | cgrid = Multilevel_MQ_Clustering_init(cA, level); |
481 | deg_intra_new = REALLOC(deg_intra_new, nc*sizeof(real)); |
482 | wgt_new = REALLOC(wgt_new, nc*sizeof(real)); |
483 | cgrid->deg_intra = deg_intra_new; |
484 | cgrid->mq = grid->mq + total_gain; |
485 | cgrid->wgt = wgt_new; |
486 | dout_new = REALLOC(dout_new, nc*sizeof(real)); |
487 | cgrid->dout = dout_new; |
488 | |
489 | cgrid = Multilevel_MQ_Clustering_establish(cgrid, maxcluster); |
490 | |
491 | grid->next = cgrid; |
492 | cgrid->prev = grid; |
493 | } else { |
494 | /* no more improvement, stop and final clustering found */ |
495 | for (i = 0; i < n; i++) matching[i] = i; |
496 | |
497 | FREE(deg_intra_new); |
498 | FREE(wgt_new); |
499 | FREE(dout_new); |
500 | } |
501 | |
502 | RETURN: |
503 | for (i = 0; i < n; i++) SingleLinkedList_delete(neighbors[i], free); |
504 | FREE(neighbors); |
505 | |
506 | FREE(deg_inter); |
507 | FREE(mask); |
508 | return grid; |
509 | } |
510 | |
511 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_new(SparseMatrix A0, int maxcluster){ |
512 | /* maxcluster is used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters |
513 | is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 */ |
514 | Multilevel_MQ_Clustering grid; |
515 | SparseMatrix A = A0; |
516 | |
517 | if (maxcluster <= 0) maxcluster = A->m; |
518 | if (!SparseMatrix_is_symmetric(A, FALSE) || A->type != MATRIX_TYPE_REAL){ |
519 | A = SparseMatrix_get_real_adjacency_matrix_symmetrized(A); |
520 | } |
521 | grid = Multilevel_MQ_Clustering_init(A, 0); |
522 | |
523 | grid = Multilevel_MQ_Clustering_establish(grid, maxcluster); |
524 | |
525 | if (A != A0) grid->delete_top_level_A = TRUE;/* be sure to clean up later */ |
526 | return grid; |
527 | } |
528 | |
529 | |
530 | static void hierachical_mq_clustering(SparseMatrix A, int maxcluster, |
531 | int *nclusters, int **assignment, real *mq, int *flag){ |
532 | /* find a clustering of vertices by maximize mq |
533 | A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1. |
534 | maxcluster: used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters |
535 | . is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 |
536 | nclusters: on output the number of clusters |
537 | assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters |
538 | */ |
539 | |
540 | Multilevel_MQ_Clustering grid, cgrid; |
541 | int *matching, i; |
542 | SparseMatrix P; |
543 | real *u; |
544 | assert(A->m == A->n); |
545 | |
546 | *mq = 0.; |
547 | |
548 | *flag = 0; |
549 | |
550 | grid = Multilevel_MQ_Clustering_new(A, maxcluster); |
551 | |
552 | /* find coarsest */ |
553 | cgrid = grid; |
554 | while (cgrid->next){ |
555 | cgrid = cgrid->next; |
556 | } |
557 | |
558 | /* project clustering up */ |
559 | u = MALLOC(sizeof(real)*cgrid->n); |
560 | for (i = 0; i < cgrid->n; i++) u[i] = (real) (cgrid->matching)[i]; |
561 | *nclusters = cgrid->n; |
562 | *mq = cgrid->mq; |
563 | |
564 | while (cgrid->prev){ |
565 | real *v = NULL; |
566 | P = cgrid->prev->P; |
567 | SparseMatrix_multiply_vector(P, u, &v, FALSE); |
568 | FREE(u); |
569 | u = v; |
570 | cgrid = cgrid->prev; |
571 | } |
572 | |
573 | if (*assignment){ |
574 | matching = *assignment; |
575 | } else { |
576 | matching = MALLOC(sizeof(int)*(grid->n)); |
577 | *assignment = matching; |
578 | } |
579 | for (i = 0; i < grid->n; i++) (matching)[i] = (int) u[i]; |
580 | FREE(u); |
581 | |
582 | Multilevel_MQ_Clustering_delete(grid); |
583 | |
584 | } |
585 | |
586 | |
587 | |
588 | void mq_clustering(SparseMatrix A, int inplace, int maxcluster, int use_value, |
589 | int *nclusters, int **assignment, real *mq, int *flag){ |
590 | /* find a clustering of vertices by maximize mq |
591 | A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1. |
592 | inplace: whether A can e modified. If true, A will be modified by removing diagonal. |
593 | maxcluster: used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters |
594 | . is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 |
595 | nclusters: on output the number of clusters |
596 | assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters |
597 | */ |
598 | SparseMatrix B; |
599 | |
600 | *flag = 0; |
601 | |
602 | assert(A->m == A->n); |
603 | |
604 | B = SparseMatrix_symmetrize(A, FALSE); |
605 | |
606 | if (!inplace && B == A) { |
607 | B = SparseMatrix_copy(A); |
608 | } |
609 | |
610 | B = SparseMatrix_remove_diagonal(B); |
611 | |
612 | if (B->type != MATRIX_TYPE_REAL || !use_value) B = SparseMatrix_set_entries_to_real_one(B); |
613 | |
614 | hierachical_mq_clustering(B, maxcluster, nclusters, assignment, mq, flag); |
615 | |
616 | if (B != A) SparseMatrix_delete(B); |
617 | |
618 | } |
619 | |