Actual source code: hem.c
2: #include <petsc/private/matimpl.h>
3: #include <../src/mat/impls/aij/seq/aij.h>
4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
6: /* linked list methods
7: *
8: * PetscCDCreate
9: */
10: PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
11: {
12: PetscCoarsenData *ail;
14: /* allocate pool, partially */
15: PetscNew(&ail);
16: *a_out = ail;
17: ail->pool_list.next = NULL;
18: ail->pool_list.array = NULL;
19: ail->chk_sz = 0;
20: /* allocate array */
21: ail->size = a_size;
22: PetscCalloc1(a_size, &ail->array);
23: ail->extra_nodes = NULL;
24: ail->mat = NULL;
25: return 0;
26: }
28: /* NPDestroy
29: */
30: PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
31: {
32: PetscCDArrNd *n = &ail->pool_list;
34: n = n->next;
35: while (n) {
36: PetscCDArrNd *lstn = n;
37: n = n->next;
38: PetscFree(lstn);
39: }
40: if (ail->pool_list.array) PetscFree(ail->pool_list.array);
41: PetscFree(ail->array);
42: if (ail->mat) MatDestroy(&ail->mat);
43: /* delete this (+agg+pool array) */
44: PetscFree(ail);
45: return 0;
46: }
48: /* PetscCDSetChuckSize
49: */
50: PetscErrorCode PetscCDSetChuckSize(PetscCoarsenData *ail, PetscInt a_sz)
51: {
52: ail->chk_sz = a_sz;
53: return 0;
54: }
56: /* PetscCDGetNewNode
57: */
58: PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
59: {
60: *a_out = NULL; /* squelch -Wmaybe-uninitialized */
61: if (ail->extra_nodes) {
62: PetscCDIntNd *node = ail->extra_nodes;
63: ail->extra_nodes = node->next;
64: node->gid = a_id;
65: node->next = NULL;
66: *a_out = node;
67: } else {
68: if (!ail->pool_list.array) {
69: if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
70: PetscMalloc1(ail->chk_sz, &ail->pool_list.array);
71: ail->new_node = ail->pool_list.array;
72: ail->new_left = ail->chk_sz;
73: ail->new_node->next = NULL;
74: } else if (!ail->new_left) {
75: PetscCDArrNd *node;
76: PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node);
77: node->array = (PetscCDIntNd *)(node + 1);
78: node->next = ail->pool_list.next;
79: ail->pool_list.next = node;
80: ail->new_left = ail->chk_sz;
81: ail->new_node = node->array;
82: }
83: ail->new_node->gid = a_id;
84: ail->new_node->next = NULL;
85: *a_out = ail->new_node++;
86: ail->new_left--;
87: }
88: return 0;
89: }
91: /* PetscCDIntNdSetID
92: */
93: PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
94: {
95: a_this->gid = a_id;
96: return 0;
97: }
99: /* PetscCDIntNdGetID
100: */
101: PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
102: {
103: *a_gid = a_this->gid;
104: return 0;
105: }
107: /* PetscCDGetHeadPos
108: */
109: PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
110: {
112: *pos = ail->array[a_idx];
113: return 0;
114: }
116: /* PetscCDGetNextPos
117: */
118: PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
119: {
121: *pos = (*pos)->next;
122: return 0;
123: }
125: /* PetscCDAppendID
126: */
127: PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
128: {
129: PetscCDIntNd *n, *n2;
131: PetscCDGetNewNode(ail, &n, a_id);
133: if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n;
134: else {
135: do {
136: if (!n2->next) {
137: n2->next = n;
139: break;
140: }
141: n2 = n2->next;
142: } while (n2);
144: }
145: return 0;
146: }
148: /* PetscCDAppendNode
149: */
150: PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n)
151: {
152: PetscCDIntNd *n2;
155: if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n;
156: else {
157: do {
158: if (!n2->next) {
159: n2->next = a_n;
160: a_n->next = NULL;
161: break;
162: }
163: n2 = n2->next;
164: } while (n2);
166: }
167: return 0;
168: }
170: /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API
171: */
172: PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last)
173: {
174: PetscCDIntNd *del;
178: del = a_last->next;
179: a_last->next = del->next;
180: /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
181: /* could reuse n2 but PetscCDAppendNode sometimes uses it */
182: return 0;
183: }
185: /* PetscCDPrint
186: */
187: PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, MPI_Comm comm)
188: {
189: PetscCDIntNd *n;
190: PetscInt ii, kk;
191: PetscMPIInt rank;
193: MPI_Comm_rank(comm, &rank);
194: for (ii = 0; ii < ail->size; ii++) {
195: kk = 0;
196: n = ail->array[ii];
197: if (n) PetscPrintf(comm, "[%d]%s list %" PetscInt_FMT ":\n", rank, PETSC_FUNCTION_NAME, ii);
198: while (n) {
199: PetscPrintf(comm, "\t[%d] %" PetscInt_FMT ") id %" PetscInt_FMT "\n", rank, ++kk, n->gid);
200: n = n->next;
201: }
202: }
203: return 0;
204: }
206: /* PetscCDAppendRemove
207: */
208: PetscErrorCode PetscCDAppendRemove(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
209: {
210: PetscCDIntNd *n;
215: n = ail->array[a_destidx];
216: if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
217: else {
218: do {
219: if (!n->next) {
220: n->next = ail->array[a_srcidx];
221: break;
222: }
223: n = n->next;
224: } while (1);
225: }
226: ail->array[a_srcidx] = NULL;
227: return 0;
228: }
230: /* PetscCDRemoveAll
231: */
232: PetscErrorCode PetscCDRemoveAll(PetscCoarsenData *ail, PetscInt a_idx)
233: {
234: PetscCDIntNd *rem, *n1;
237: rem = ail->array[a_idx];
238: ail->array[a_idx] = NULL;
239: if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem;
240: else {
241: while (n1->next) n1 = n1->next;
242: n1->next = rem;
243: }
244: return 0;
245: }
247: /* PetscCDSizeAt
248: */
249: PetscErrorCode PetscCDSizeAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
250: {
251: PetscCDIntNd *n1;
252: PetscInt sz = 0;
255: n1 = ail->array[a_idx];
256: while (n1) {
257: n1 = n1->next;
258: sz++;
259: }
260: *a_sz = sz;
261: return 0;
262: }
264: /* PetscCDEmptyAt
265: */
266: PetscErrorCode PetscCDEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
267: {
269: *a_e = (PetscBool)(ail->array[a_idx] == NULL);
270: return 0;
271: }
273: /* PetscCDGetMIS - used for C-F methods
274: */
275: PetscErrorCode PetscCDGetMIS(PetscCoarsenData *ail, IS *a_mis)
276: {
277: PetscCDIntNd *n;
278: PetscInt ii, kk;
279: PetscInt *permute;
281: for (ii = kk = 0; ii < ail->size; ii++) {
282: n = ail->array[ii];
283: if (n) kk++;
284: }
285: PetscMalloc1(kk, &permute);
286: for (ii = kk = 0; ii < ail->size; ii++) {
287: n = ail->array[ii];
288: if (n) permute[kk++] = ii;
289: }
290: ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis);
291: return 0;
292: }
294: /* PetscCDGetMat
295: */
296: PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat)
297: {
298: *a_mat = ail->mat;
299: ail->mat = NULL; // give it up
300: return 0;
301: }
303: /* PetscCDSetMat
304: */
305: PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
306: {
307: if (ail->mat) {
308: MatDestroy(&ail->mat); //should not happen
309: }
310: ail->mat = a_mat;
311: return 0;
312: }
314: /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers
315: */
316: PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, Mat mat, PetscInt *a_sz, IS **a_local_is)
317: {
318: PetscCDIntNd *n;
319: PetscInt lsz, ii, kk, *idxs, jj, s, e, gid;
320: IS *is_loc, is_bcs;
322: for (ii = kk = 0; ii < ail->size; ii++) {
323: if (ail->array[ii]) kk++;
324: }
325: /* count BCs */
326: MatGetOwnershipRange(mat, &s, &e);
327: for (gid = s, lsz = 0; gid < e; gid++) {
328: MatGetRow(mat, gid, &jj, NULL, NULL);
329: if (jj < 2) lsz++;
330: MatRestoreRow(mat, gid, &jj, NULL, NULL);
331: }
332: if (lsz) {
333: PetscMalloc1(a_bs * lsz, &idxs);
334: for (gid = s, lsz = 0; gid < e; gid++) {
335: MatGetRow(mat, gid, &jj, NULL, NULL);
336: if (jj < 2) {
337: for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
338: }
339: MatRestoreRow(mat, gid, &jj, NULL, NULL);
340: }
341: ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_bcs);
342: *a_sz = kk + 1; /* out */
343: } else {
344: is_bcs = NULL;
345: *a_sz = kk; /* out */
346: }
347: PetscMalloc1(*a_sz, &is_loc);
349: for (ii = kk = 0; ii < ail->size; ii++) {
350: for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */
351: ;
352: if (lsz) {
353: PetscMalloc1(a_bs * lsz, &idxs);
354: for (lsz = 0, n = ail->array[ii]; n; n = n->next) {
355: PetscCDIntNdGetID(n, &gid);
356: for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
357: }
358: ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]);
359: }
360: }
361: if (is_bcs) is_loc[kk++] = is_bcs;
363: *a_local_is = is_loc; /* out */
365: return 0;
366: }
368: /* ********************************************************************** */
369: /* edge for priority queue */
370: typedef struct edge_tag {
371: PetscReal weight;
372: PetscInt lid0, gid1, cpid1;
373: } Edge;
375: static int gamg_hem_compare(const void *a, const void *b)
376: {
377: PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight;
378: return (va < vb) ? 1 : (va == vb) ? 0 : -1; /* 0 for equal */
379: }
381: /* -------------------------------------------------------------------------- */
382: /*
383: MatCoarsenApply_HEM_private - parallel heavy edge matching
385: Input Parameter:
386: . perm - permutation
387: . a_Gmat - global matrix of the graph
389: Output Parameter:
390: . a_locals_llist - array of list of local nodes rooted at local node
391: */
392: static PetscErrorCode MatCoarsenApply_HEM_private(IS perm, Mat a_Gmat, PetscCoarsenData **a_locals_llist)
393: {
394: PetscBool isMPI;
395: MPI_Comm comm;
396: PetscInt sub_it, kk, n, ix, *idx, *ii, iter, Iend, my0;
397: PetscMPIInt rank, size;
398: const PetscInt nloc = a_Gmat->rmap->n, n_iter = 4; /* need to figure out how to stop this */
399: PetscInt *lid_cprowID, *lid_gid;
400: PetscBool *lid_matched;
401: Mat_SeqAIJ *matA, *matB = NULL;
402: Mat_MPIAIJ *mpimat = NULL;
403: PetscScalar one = 1.;
404: PetscCoarsenData *agg_llists = NULL, *deleted_list = NULL;
405: Mat cMat, tMat, P;
406: MatScalar *ap;
407: PetscMPIInt tag1, tag2;
409: PetscObjectGetComm((PetscObject)a_Gmat, &comm);
410: MPI_Comm_rank(comm, &rank);
411: MPI_Comm_size(comm, &size);
412: MatGetOwnershipRange(a_Gmat, &my0, &Iend);
413: PetscCommGetNewTag(comm, &tag1);
414: PetscCommGetNewTag(comm, &tag2);
416: PetscMalloc1(nloc, &lid_gid); /* explicit array needed */
417: PetscMalloc1(nloc, &lid_cprowID);
418: PetscMalloc1(nloc, &lid_matched);
420: PetscCDCreate(nloc, &agg_llists);
421: /* PetscCDSetChuckSize(agg_llists, nloc+1); */
422: *a_locals_llist = agg_llists;
423: PetscCDCreate(size, &deleted_list);
424: PetscCDSetChuckSize(deleted_list, 100);
425: /* setup 'lid_gid' for scatters and add self to all lists */
426: for (kk = 0; kk < nloc; kk++) {
427: lid_gid[kk] = kk + my0;
428: PetscCDAppendID(agg_llists, kk, my0 + kk);
429: }
431: /* make a copy of the graph, this gets destroyed in iterates */
432: MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat);
433: PetscObjectTypeCompare((PetscObject)a_Gmat, MATMPIAIJ, &isMPI);
434: iter = 0;
435: while (iter++ < n_iter) {
436: PetscScalar *cpcol_gid, *cpcol_max_ew, *cpcol_max_pe, *lid_max_ew;
437: PetscBool *cpcol_matched;
438: PetscMPIInt *cpcol_pe, proc;
439: Vec locMaxEdge, locMaxPE, ghostMaxEdge, ghostMaxPE;
440: PetscInt nEdges, n_nz_row, jj;
441: Edge *Edges;
442: PetscInt gid;
443: const PetscInt *perm_ix, n_sub_its = 120;
445: /* get submatrices of cMat */
446: if (isMPI) {
447: mpimat = (Mat_MPIAIJ *)cMat->data;
448: matA = (Mat_SeqAIJ *)mpimat->A->data;
449: matB = (Mat_SeqAIJ *)mpimat->B->data;
450: if (!matB->compressedrow.use) {
451: /* force construction of compressed row data structure since code below requires it */
452: MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0);
453: }
454: } else {
455: matA = (Mat_SeqAIJ *)cMat->data;
456: }
458: /* set max edge on nodes */
459: MatCreateVecs(cMat, &locMaxEdge, NULL);
460: MatCreateVecs(cMat, &locMaxPE, NULL);
462: /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */
463: if (mpimat) {
464: Vec vec;
465: PetscScalar vval;
467: MatCreateVecs(cMat, &vec, NULL);
468: /* cpcol_pe */
469: vval = (PetscScalar)(rank);
470: for (kk = 0, gid = my0; kk < nloc; kk++, gid++) { VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES); /* set with GID */ }
471: VecAssemblyBegin(vec);
472: VecAssemblyEnd(vec);
473: VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD);
474: VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD);
475: VecGetArray(mpimat->lvec, &cpcol_gid); /* get proc ID in 'cpcol_gid' */
476: VecGetLocalSize(mpimat->lvec, &n);
477: PetscMalloc1(n, &cpcol_pe);
478: for (kk = 0; kk < n; kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]);
479: VecRestoreArray(mpimat->lvec, &cpcol_gid);
481: /* cpcol_gid */
482: for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
483: vval = (PetscScalar)(gid);
484: VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
485: }
486: VecAssemblyBegin(vec);
487: VecAssemblyEnd(vec);
488: VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD);
489: VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD);
490: VecDestroy(&vec);
491: VecGetArray(mpimat->lvec, &cpcol_gid); /* get proc ID in 'cpcol_gid' */
493: /* cpcol_matched */
494: VecGetLocalSize(mpimat->lvec, &n);
495: PetscMalloc1(n, &cpcol_matched);
496: for (kk = 0; kk < n; kk++) cpcol_matched[kk] = PETSC_FALSE;
497: }
499: /* need an inverse map - locals */
500: for (kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1;
501: /* set index into compressed row 'lid_cprowID' */
502: if (matB) {
503: for (ix = 0; ix < matB->compressedrow.nrows; ix++) lid_cprowID[matB->compressedrow.rindex[ix]] = ix;
504: }
506: /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */
507: for (nEdges = 0, kk = 0, gid = my0; kk < nloc; kk++, gid++) {
508: PetscReal max_e = 0., tt;
509: PetscScalar vval;
510: PetscInt lid = kk;
511: PetscMPIInt max_pe = rank, pe;
513: ii = matA->i;
514: n = ii[lid + 1] - ii[lid];
515: idx = matA->j + ii[lid];
516: ap = matA->a + ii[lid];
517: for (jj = 0; jj < n; jj++) {
518: PetscInt lidj = idx[jj];
519: if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
520: if (lidj > lid) nEdges++;
521: }
522: if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
523: ii = matB->compressedrow.i;
524: n = ii[ix + 1] - ii[ix];
525: ap = matB->a + ii[ix];
526: idx = matB->j + ii[ix];
527: for (jj = 0; jj < n; jj++) {
528: if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
529: nEdges++;
530: if ((pe = cpcol_pe[idx[jj]]) > max_pe) max_pe = pe;
531: }
532: }
533: vval = max_e;
534: VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);
536: vval = (PetscScalar)max_pe;
537: VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);
538: }
539: VecAssemblyBegin(locMaxEdge);
540: VecAssemblyEnd(locMaxEdge);
541: VecAssemblyBegin(locMaxPE);
542: VecAssemblyEnd(locMaxPE);
544: /* get 'cpcol_max_ew' & 'cpcol_max_pe' */
545: if (mpimat) {
546: VecDuplicate(mpimat->lvec, &ghostMaxEdge);
547: VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD);
548: VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD);
549: VecGetArray(ghostMaxEdge, &cpcol_max_ew);
551: VecDuplicate(mpimat->lvec, &ghostMaxPE);
552: VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD);
553: VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD);
554: VecGetArray(ghostMaxPE, &cpcol_max_pe);
555: }
557: /* setup sorted list of edges */
558: PetscMalloc1(nEdges, &Edges);
559: ISGetIndices(perm, &perm_ix);
560: for (nEdges = n_nz_row = kk = 0; kk < nloc; kk++) {
561: PetscInt nn, lid = perm_ix[kk];
562: ii = matA->i;
563: nn = n = ii[lid + 1] - ii[lid];
564: idx = matA->j + ii[lid];
565: ap = matA->a + ii[lid];
566: for (jj = 0; jj < n; jj++) {
567: PetscInt lidj = idx[jj];
568: if (lidj > lid) {
569: Edges[nEdges].lid0 = lid;
570: Edges[nEdges].gid1 = lidj + my0;
571: Edges[nEdges].cpid1 = -1;
572: Edges[nEdges].weight = PetscRealPart(ap[jj]);
573: nEdges++;
574: }
575: }
576: if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
577: ii = matB->compressedrow.i;
578: n = ii[ix + 1] - ii[ix];
579: ap = matB->a + ii[ix];
580: idx = matB->j + ii[ix];
581: nn += n;
582: for (jj = 0; jj < n; jj++) {
583: Edges[nEdges].lid0 = lid;
584: Edges[nEdges].gid1 = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]);
585: Edges[nEdges].cpid1 = idx[jj];
586: Edges[nEdges].weight = PetscRealPart(ap[jj]);
587: nEdges++;
588: }
589: }
590: if (nn > 1) n_nz_row++;
591: else if (iter == 1) {
592: /* should select this because it is technically in the MIS but lets not */
593: PetscCDRemoveAll(agg_llists, lid);
594: }
595: }
596: ISRestoreIndices(perm, &perm_ix);
598: qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);
600: /* projection matrix */
601: MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, NULL, 1, NULL, &P);
603: /* clear matched flags */
604: for (kk = 0; kk < nloc; kk++) lid_matched[kk] = PETSC_FALSE;
605: /* process - communicate - process */
606: for (sub_it = 0; sub_it < n_sub_its; sub_it++) {
607: PetscInt nactive_edges;
609: VecGetArray(locMaxEdge, &lid_max_ew);
610: for (kk = nactive_edges = 0; kk < nEdges; kk++) {
611: /* HEM */
612: const Edge *e = &Edges[kk];
613: const PetscInt lid0 = e->lid0, gid1 = e->gid1, cpid1 = e->cpid1, gid0 = lid0 + my0, lid1 = gid1 - my0;
614: PetscBool isOK = PETSC_TRUE;
616: /* skip if either (local) vertex is done already */
617: if (lid_matched[lid0] || (gid1 >= my0 && gid1 < Iend && lid_matched[gid1 - my0])) continue;
619: /* skip if ghost vertex is done */
620: if (cpid1 != -1 && cpcol_matched[cpid1]) continue;
622: nactive_edges++;
623: /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */
624: if (PetscRealPart(lid_max_ew[lid0]) > e->weight + PETSC_SMALL) continue;
626: if (cpid1 == -1) {
627: if (PetscRealPart(lid_max_ew[lid1]) > e->weight + PETSC_SMALL) continue;
628: } else {
629: /* see if edge might get matched on other proc */
630: PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]);
631: if (g_max_e > e->weight + PETSC_SMALL) continue;
632: else if (e->weight > g_max_e - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > rank) {
633: /* check for max_e == to this edge and larger processor that will deal with this */
634: continue;
635: }
636: }
638: /* check ghost for v0 */
639: if (isOK) {
640: PetscReal max_e, ew;
641: if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
642: ii = matB->compressedrow.i;
643: n = ii[ix + 1] - ii[ix];
644: ap = matB->a + ii[ix];
645: idx = matB->j + ii[ix];
646: for (jj = 0; jj < n && isOK; jj++) {
647: PetscInt lidj = idx[jj];
648: if (cpcol_matched[lidj]) continue;
649: ew = PetscRealPart(ap[jj]);
650: max_e = PetscRealPart(cpcol_max_ew[lidj]);
651: /* check for max_e == to this edge and larger processor that will deal with this */
652: if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid0]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) isOK = PETSC_FALSE;
653: }
654: }
656: /* for v1 */
657: if (cpid1 == -1 && isOK) {
658: if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
659: ii = matB->compressedrow.i;
660: n = ii[ix + 1] - ii[ix];
661: ap = matB->a + ii[ix];
662: idx = matB->j + ii[ix];
663: for (jj = 0; jj < n && isOK; jj++) {
664: PetscInt lidj = idx[jj];
665: if (cpcol_matched[lidj]) continue;
666: ew = PetscRealPart(ap[jj]);
667: max_e = PetscRealPart(cpcol_max_ew[lidj]);
668: /* check for max_e == to this edge and larger processor that will deal with this */
669: if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid1]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) isOK = PETSC_FALSE;
670: }
671: }
672: }
673: }
675: /* do it */
676: if (isOK) {
677: if (cpid1 == -1) {
678: lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
679: PetscCDAppendRemove(agg_llists, lid0, lid1);
680: } else if (sub_it != n_sub_its - 1) {
681: /* add gid1 to list of ghost deleted by me -- I need their children */
682: proc = cpcol_pe[cpid1];
684: cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */
686: PetscCDAppendID(deleted_list, proc, cpid1); /* cache to send messages */
687: PetscCDAppendID(deleted_list, proc, lid0);
688: } else continue;
690: lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
691: /* set projection */
692: MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES);
693: MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES);
694: } /* matched */
695: } /* edge loop */
697: /* deal with deleted ghost on first pass */
698: if (size > 1 && sub_it != n_sub_its - 1) {
699: #define REQ_BF_SIZE 100
700: PetscCDIntNd *pos;
701: PetscBool ise = PETSC_FALSE;
702: PetscInt nSend1, **sbuffs1, nSend2;
703: MPI_Request *sreqs2[REQ_BF_SIZE], *rreqs2[REQ_BF_SIZE];
704: MPI_Status status;
706: /* send request */
707: for (proc = 0, nSend1 = 0; proc < size; proc++) {
708: PetscCDEmptyAt(deleted_list, proc, &ise);
709: if (!ise) nSend1++;
710: }
711: PetscMalloc1(nSend1, &sbuffs1);
712: for (proc = 0, nSend1 = 0; proc < size; proc++) {
713: /* count ghosts */
714: PetscCDSizeAt(deleted_list, proc, &n);
715: if (n > 0) {
716: #define CHUNCK_SIZE 100
717: PetscInt *sbuff, *pt;
718: MPI_Request *request;
719: n /= 2;
720: PetscMalloc1(2 + 2 * n + n * CHUNCK_SIZE + 2, &sbuff);
721: /* save requests */
722: sbuffs1[nSend1] = sbuff;
723: request = (MPI_Request *)sbuff;
724: sbuff = pt = (PetscInt *)(request + 1);
725: *pt++ = n;
726: *pt++ = rank;
728: PetscCDGetHeadPos(deleted_list, proc, &pos);
729: while (pos) {
730: PetscInt lid0, cpid, gid;
731: PetscCDIntNdGetID(pos, &cpid);
732: gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
733: PetscCDGetNextPos(deleted_list, proc, &pos);
734: PetscCDIntNdGetID(pos, &lid0);
735: PetscCDGetNextPos(deleted_list, proc, &pos);
736: *pt++ = gid;
737: *pt++ = lid0;
738: }
739: /* send request tag1 [n, proc, n*[gid1,lid0] ] */
740: MPI_Isend(sbuff, 2 * n + 2, MPIU_INT, proc, tag1, comm, request);
741: /* post receive */
742: request = (MPI_Request *)pt;
743: rreqs2[nSend1] = request; /* cache recv request */
744: pt = (PetscInt *)(request + 1);
745: MPI_Irecv(pt, n * CHUNCK_SIZE, MPIU_INT, proc, tag2, comm, request);
746: /* clear list */
747: PetscCDRemoveAll(deleted_list, proc);
748: nSend1++;
749: }
750: }
751: /* receive requests, send response, clear lists */
752: kk = nactive_edges;
753: MPIU_Allreduce(&kk, &nactive_edges, 1, MPIU_INT, MPI_SUM, comm); /* not correct synchronization and global */
754: nSend2 = 0;
755: while (1) {
756: #define BF_SZ 10000
757: PetscMPIInt flag, count;
758: PetscInt rbuff[BF_SZ], *pt, *pt2, *pt3, count2, *sbuff, count3;
759: MPI_Request *request;
761: MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &status);
762: if (!flag) break;
763: MPI_Get_count(&status, MPIU_INT, &count);
765: proc = status.MPI_SOURCE;
766: /* receive request tag1 [n, proc, n*[gid1,lid0] ] */
767: MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, comm, &status);
768: /* count sends */
769: pt = rbuff;
770: count3 = count2 = 0;
771: n = *pt++;
772: kk = *pt++;
773: while (n--) {
774: PetscInt gid1 = *pt++, lid1 = gid1 - my0;
775: kk = *pt++;
777: lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
778: PetscCDSizeAt(agg_llists, lid1, &kk);
779: count2 += kk + 2;
780: count3++; /* number of verts requested (n) */
781: }
783: /* send tag2 *[lid0, n, n*[gid] ] */
784: PetscMalloc(count2 * sizeof(PetscInt) + sizeof(MPI_Request), &sbuff);
785: request = (MPI_Request *)sbuff;
786: sreqs2[nSend2++] = request; /* cache request */
788: pt2 = sbuff = (PetscInt *)(request + 1);
789: pt = rbuff;
790: n = *pt++;
791: kk = *pt++;
792: while (n--) {
793: /* read [n, proc, n*[gid1,lid0] */
794: PetscInt gid1 = *pt++, lid1 = gid1 - my0, lid0 = *pt++;
795: /* write [lid0, n, n*[gid] ] */
796: *pt2++ = lid0;
797: pt3 = pt2++; /* save pointer for later */
798: PetscCDGetHeadPos(agg_llists, lid1, &pos);
799: while (pos) {
800: PetscInt gid;
801: PetscCDIntNdGetID(pos, &gid);
802: PetscCDGetNextPos(agg_llists, lid1, &pos);
803: *pt2++ = gid;
804: }
805: *pt3 = (pt2 - pt3) - 1;
806: /* clear list */
807: PetscCDRemoveAll(agg_llists, lid1);
808: }
809: /* send requested data tag2 *[lid0, n, n*[gid1] ] */
810: MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, comm, request);
811: }
813: /* receive tag2 *[lid0, n, n*[gid] ] */
814: for (kk = 0; kk < nSend1; kk++) {
815: PetscMPIInt count;
816: MPI_Request *request;
817: PetscInt *pt, *pt2;
819: request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */
820: MPI_Wait(request, &status);
821: MPI_Get_count(&status, MPIU_INT, &count);
822: pt = pt2 = (PetscInt *)(request + 1);
823: while (pt - pt2 < count) {
824: PetscInt lid0 = *pt++, n = *pt++;
825: while (n--) {
826: PetscInt gid1 = *pt++;
827: PetscCDAppendID(agg_llists, lid0, gid1);
828: }
829: }
830: }
832: /* wait for tag1 isends */
833: while (nSend1--) {
834: MPI_Request *request;
835: request = (MPI_Request *)sbuffs1[nSend1];
836: MPI_Wait(request, &status);
837: PetscFree(request);
838: }
839: PetscFree(sbuffs1);
841: /* wait for tag2 isends */
842: while (nSend2--) {
843: MPI_Request *request = sreqs2[nSend2];
844: MPI_Wait(request, &status);
845: PetscFree(request);
846: }
848: VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
849: VecRestoreArray(ghostMaxPE, &cpcol_max_pe);
851: /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */
852: for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
853: PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
854: VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
855: }
856: VecAssemblyBegin(locMaxPE);
857: VecAssemblyEnd(locMaxPE);
858: VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD);
859: VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD);
860: VecGetArray(ghostMaxEdge, &cpcol_max_ew);
861: VecGetLocalSize(mpimat->lvec, &n);
862: for (kk = 0; kk < n; kk++) cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0);
863: VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
864: } /* size > 1 */
866: /* compute 'locMaxEdge' */
867: VecRestoreArray(locMaxEdge, &lid_max_ew);
868: for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
869: PetscReal max_e = 0., tt;
870: PetscScalar vval;
871: PetscInt lid = kk;
873: if (lid_matched[lid]) vval = 0.;
874: else {
875: ii = matA->i;
876: n = ii[lid + 1] - ii[lid];
877: idx = matA->j + ii[lid];
878: ap = matA->a + ii[lid];
879: for (jj = 0; jj < n; jj++) {
880: PetscInt lidj = idx[jj];
881: if (lid_matched[lidj]) continue; /* this is new - can change local max */
882: if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
883: }
884: if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
885: ii = matB->compressedrow.i;
886: n = ii[ix + 1] - ii[ix];
887: ap = matB->a + ii[ix];
888: idx = matB->j + ii[ix];
889: for (jj = 0; jj < n; jj++) {
890: PetscInt lidj = idx[jj];
891: if (cpcol_matched[lidj]) continue;
892: if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
893: }
894: }
895: }
896: vval = (PetscScalar)max_e;
897: VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES); /* set with GID */
898: }
899: VecAssemblyBegin(locMaxEdge);
900: VecAssemblyEnd(locMaxEdge);
902: if (size > 1 && sub_it != n_sub_its - 1) {
903: /* compute 'cpcol_max_ew' */
904: VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD);
905: VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD);
906: VecGetArray(ghostMaxEdge, &cpcol_max_ew);
907: VecGetArray(locMaxEdge, &lid_max_ew);
909: /* compute 'cpcol_max_pe' */
910: for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
911: PetscInt lid = kk;
912: PetscReal ew, v1_max_e, v0_max_e = PetscRealPart(lid_max_ew[lid]);
913: PetscScalar vval;
914: PetscMPIInt max_pe = rank, pe;
916: if (lid_matched[lid]) vval = (PetscScalar)rank;
917: else if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i;
918: n = ii[ix + 1] - ii[ix];
919: ap = matB->a + ii[ix];
920: idx = matB->j + ii[ix];
921: for (jj = 0; jj < n; jj++) {
922: PetscInt lidj = idx[jj];
923: if (cpcol_matched[lidj]) continue;
924: ew = PetscRealPart(ap[jj]);
925: v1_max_e = PetscRealPart(cpcol_max_ew[lidj]);
926: /* get max pe that has a max_e == to this edge w */
927: if ((pe = cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - PETSC_SMALL && ew > v0_max_e - PETSC_SMALL) max_pe = pe;
928: }
929: vval = (PetscScalar)max_pe;
930: }
931: VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);
932: }
933: VecAssemblyBegin(locMaxPE);
934: VecAssemblyEnd(locMaxPE);
936: VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD);
937: VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD);
938: VecGetArray(ghostMaxPE, &cpcol_max_pe);
939: VecRestoreArray(locMaxEdge, &lid_max_ew);
940: } /* deal with deleted ghost */
941: PetscInfo(a_Gmat, "\t %" PetscInt_FMT ".%" PetscInt_FMT ": %" PetscInt_FMT " active edges.\n", iter, sub_it, nactive_edges);
942: if (!nactive_edges) break;
943: } /* sub_it loop */
945: /* clean up iteration */
946: PetscFree(Edges);
947: if (mpimat) {
948: VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);
949: VecDestroy(&ghostMaxEdge);
950: VecRestoreArray(ghostMaxPE, &cpcol_max_pe);
951: VecDestroy(&ghostMaxPE);
952: PetscFree(cpcol_pe);
953: PetscFree(cpcol_matched);
954: }
956: VecDestroy(&locMaxEdge);
957: VecDestroy(&locMaxPE);
959: if (mpimat) VecRestoreArray(mpimat->lvec, &cpcol_gid);
961: /* create next G if needed */
962: if (iter == n_iter) { /* hard wired test - need to look at full surrounded nodes or something */
963: MatDestroy(&P);
964: MatDestroy(&cMat);
965: break;
966: } else {
967: Vec diag;
968: /* add identity for unmatched vertices so they stay alive */
969: for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
970: if (!lid_matched[kk]) {
971: gid = kk + my0;
972: MatGetRow(cMat, gid, &n, NULL, NULL);
973: if (n > 1) MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES);
974: MatRestoreRow(cMat, gid, &n, NULL, NULL);
975: }
976: }
977: MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY);
978: MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY);
980: /* project to make new graph with colapsed edges */
981: MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat);
982: MatDestroy(&P);
983: MatDestroy(&cMat);
984: cMat = tMat;
985: MatCreateVecs(cMat, &diag, NULL);
986: MatGetDiagonal(cMat, diag); /* effectively PCJACOBI */
987: VecReciprocal(diag);
988: VecSqrtAbs(diag);
989: MatDiagonalScale(cMat, diag, diag);
990: VecDestroy(&diag);
991: }
992: } /* coarsen iterator */
994: /* make fake matrix */
995: if (size > 1) {
996: Mat mat;
997: PetscCDIntNd *pos;
998: PetscInt gid, NN, MM, jj = 0, mxsz = 0;
1000: for (kk = 0; kk < nloc; kk++) {
1001: PetscCDSizeAt(agg_llists, kk, &jj);
1002: if (jj > mxsz) mxsz = jj;
1003: }
1004: MatGetSize(a_Gmat, &MM, &NN);
1005: if (mxsz > MM - nloc) mxsz = MM - nloc;
1007: MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat);
1009: for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
1010: /* for (pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)) { */
1011: PetscCDGetHeadPos(agg_llists, kk, &pos);
1012: while (pos) {
1013: PetscInt gid1;
1014: PetscCDIntNdGetID(pos, &gid1);
1015: PetscCDGetNextPos(agg_llists, kk, &pos);
1017: if (gid1 < my0 || gid1 >= my0 + nloc) MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES);
1018: }
1019: }
1020: MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
1021: MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
1022: PetscCDSetMat(agg_llists, mat);
1023: }
1025: PetscFree(lid_cprowID);
1026: PetscFree(lid_gid);
1027: PetscFree(lid_matched);
1028: PetscCDDestroy(deleted_list);
1029: return 0;
1030: }
1032: /*
1033: HEM coarsen, simple greedy.
1034: */
1035: static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1036: {
1037: Mat mat = coarse->graph;
1039: if (!coarse->perm) {
1040: IS perm;
1041: PetscInt n, m;
1043: MatGetLocalSize(mat, &m, &n);
1044: ISCreateStride(PetscObjectComm((PetscObject)mat), m, 0, 1, &perm);
1045: MatCoarsenApply_HEM_private(perm, mat, &coarse->agg_lists);
1046: ISDestroy(&perm);
1047: } else {
1048: MatCoarsenApply_HEM_private(coarse->perm, mat, &coarse->agg_lists);
1049: }
1050: return 0;
1051: }
1053: static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer)
1054: {
1055: PetscMPIInt rank;
1056: PetscBool iascii;
1058: MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank);
1059: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
1060: if (iascii) {
1061: PetscViewerASCIIPushSynchronized(viewer);
1062: PetscViewerASCIISynchronizedPrintf(viewer, " [%d] HEM aggregator\n", rank);
1063: PetscViewerFlush(viewer);
1064: PetscViewerASCIIPopSynchronized(viewer);
1065: }
1066: return 0;
1067: }
1069: /*MC
1070: MATCOARSENHEM - A coarsener that uses HEM a simple greedy coarsener
1072: Level: beginner
1074: .seealso: `MatCoarsen`, `MatCoarsenSetType()`, `MatCoarsenGetData()`, `MatCoarsenType`, `MatCoarsenCreate()`
1075: M*/
1077: PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1078: {
1079: coarse->ops->apply = MatCoarsenApply_HEM;
1080: coarse->ops->view = MatCoarsenView_HEM;
1081: return 0;
1082: }