Actual source code: bddcprivate.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <petsc/private/pcbddcimpl.h>
3: #include <petsc/private/pcbddcprivateimpl.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: PetscScalar *uwork, *data, *U, ds = 0.;
18: PetscReal *sing;
19: PetscBLASInt bM, bN, lwork, lierr, di = 1;
20: PetscInt ulw, i, nr, nc, n;
21: #if defined(PETSC_USE_COMPLEX)
22: PetscReal *rwork2;
23: #endif
25: MatGetSize(A, &nr, &nc);
26: if (!nr || !nc) return 0;
28: /* workspace */
29: if (!work) {
30: ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
31: PetscMalloc1(ulw, &uwork);
32: } else {
33: ulw = lw;
34: uwork = work;
35: }
36: n = PetscMin(nr, nc);
37: if (!rwork) {
38: PetscMalloc1(n, &sing);
39: } else {
40: sing = rwork;
41: }
43: /* SVD */
44: PetscMalloc1(nr * nr, &U);
45: PetscBLASIntCast(nr, &bM);
46: PetscBLASIntCast(nc, &bN);
47: PetscBLASIntCast(ulw, &lwork);
48: MatDenseGetArray(A, &data);
49: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
50: #if !defined(PETSC_USE_COMPLEX)
51: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
52: #else
53: PetscMalloc1(5 * n, &rwork2);
54: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
55: PetscFree(rwork2);
56: #endif
57: PetscFPTrapPop();
59: MatDenseRestoreArray(A, &data);
60: for (i = 0; i < n; i++)
61: if (sing[i] < PETSC_SMALL) break;
62: if (!rwork) PetscFree(sing);
63: if (!work) PetscFree(uwork);
64: /* create B */
65: if (!range) {
66: MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B);
67: MatDenseGetArray(*B, &data);
68: PetscArraycpy(data, U + nr * i, (nr - i) * nr);
69: } else {
70: MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B);
71: MatDenseGetArray(*B, &data);
72: PetscArraycpy(data, U, i * nr);
73: }
74: MatDenseRestoreArray(*B, &data);
75: PetscFree(U);
76: return 0;
77: }
79: /* TODO REMOVE */
80: #if defined(PRINT_GDET)
81: static int inc = 0;
82: static int lev = 0;
83: #endif
85: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
86: {
87: Mat GE, GEd;
88: PetscInt rsize, csize, esize;
89: PetscScalar *ptr;
91: ISGetSize(edge, &esize);
92: if (!esize) return 0;
93: ISGetSize(extrow, &rsize);
94: ISGetSize(extcol, &csize);
96: /* gradients */
97: ptr = work + 5 * esize;
98: MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE);
99: MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins);
100: MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins);
101: MatDestroy(&GE);
103: /* constants */
104: ptr += rsize * csize;
105: MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd);
106: MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE);
107: MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd);
108: MatDestroy(&GE);
109: MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins);
110: MatDestroy(&GEd);
112: if (corners) {
113: Mat GEc;
114: const PetscScalar *vals;
115: PetscScalar v;
117: MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc);
118: MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd);
119: MatDenseGetArrayRead(GEd, &vals);
120: /* v = PetscAbsScalar(vals[0]) */;
121: v = 1.;
122: cvals[0] = vals[0] / v;
123: cvals[1] = vals[1] / v;
124: MatDenseRestoreArrayRead(GEd, &vals);
125: MatScale(*GKins, 1. / v);
126: #if defined(PRINT_GDET)
127: {
128: PetscViewer viewer;
129: char filename[256];
130: sprintf(filename, "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++);
131: PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer);
132: PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB);
133: PetscObjectSetName((PetscObject)GEc, "GEc");
134: MatView(GEc, viewer);
135: PetscObjectSetName((PetscObject)(*GKins), "GK");
136: MatView(*GKins, viewer);
137: PetscObjectSetName((PetscObject)GEd, "Gproj");
138: MatView(GEd, viewer);
139: PetscViewerDestroy(&viewer);
140: }
141: #endif
142: MatDestroy(&GEd);
143: MatDestroy(&GEc);
144: }
146: return 0;
147: }
149: PetscErrorCode PCBDDCNedelecSupport(PC pc)
150: {
151: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
152: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
153: Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
154: Vec tvec;
155: PetscSF sfv;
156: ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
157: MPI_Comm comm;
158: IS lned, primals, allprimals, nedfieldlocal;
159: IS *eedges, *extrows, *extcols, *alleedges;
160: PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
161: PetscScalar *vals, *work;
162: PetscReal *rwork;
163: const PetscInt *idxs, *ii, *jj, *iit, *jjt;
164: PetscInt ne, nv, Lv, order, n, field;
165: PetscInt n_neigh, *neigh, *n_shared, **shared;
166: PetscInt i, j, extmem, cum, maxsize, nee;
167: PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs;
168: PetscInt *sfvleaves, *sfvroots;
169: PetscInt *corners, *cedges;
170: PetscInt *ecount, **eneighs, *vcount, **vneighs;
171: PetscInt *emarks;
172: PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal;
174: /* If the discrete gradient is defined for a subset of dofs and global is true,
175: it assumes G is given in global ordering for all the dofs.
176: Otherwise, the ordering is global for the Nedelec field */
177: order = pcbddc->nedorder;
178: conforming = pcbddc->conforming;
179: field = pcbddc->nedfield;
180: global = pcbddc->nedglobal;
181: setprimal = PETSC_FALSE;
182: print = PETSC_FALSE;
183: singular = PETSC_FALSE;
185: /* Command line customization */
186: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
187: PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL);
188: PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL);
189: PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL);
190: /* print debug info TODO: to be removed */
191: PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL);
192: PetscOptionsEnd();
194: /* Return if there are no edges in the decomposition and the problem is not singular */
195: MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL);
196: ISLocalToGlobalMappingGetSize(al2g, &n);
197: PetscObjectGetComm((PetscObject)pc, &comm);
198: if (!singular) {
199: VecGetArrayRead(matis->counter, (const PetscScalar **)&vals);
200: lrc[0] = PETSC_FALSE;
201: for (i = 0; i < n; i++) {
202: if (PetscRealPart(vals[i]) > 2.) {
203: lrc[0] = PETSC_TRUE;
204: break;
205: }
206: }
207: VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals);
208: MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm);
209: if (!lrc[1]) return 0;
210: }
212: /* Get Nedelec field */
214: if (pcbddc->n_ISForDofsLocal && field >= 0) {
215: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
216: nedfieldlocal = pcbddc->ISForDofsLocal[field];
217: ISGetLocalSize(nedfieldlocal, &ne);
218: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
219: ne = n;
220: nedfieldlocal = NULL;
221: global = PETSC_TRUE;
222: } else if (field == PETSC_DECIDE) {
223: PetscInt rst, ren, *idx;
225: PetscArrayzero(matis->sf_leafdata, n);
226: PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n);
227: MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren);
228: for (i = rst; i < ren; i++) {
229: PetscInt nc;
231: MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL);
232: if (nc > 1) matis->sf_rootdata[i - rst] = 1;
233: MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL);
234: }
235: PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
236: PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
237: PetscMalloc1(n, &idx);
238: for (i = 0, ne = 0; i < n; i++)
239: if (matis->sf_leafdata[i]) idx[ne++] = i;
240: ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal);
241: } else {
242: SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
243: }
245: /* Sanity checks */
250: /* Just set primal dofs and return */
251: if (setprimal) {
252: IS enedfieldlocal;
253: PetscInt *eidxs;
255: PetscMalloc1(ne, &eidxs);
256: VecGetArrayRead(matis->counter, (const PetscScalar **)&vals);
257: if (nedfieldlocal) {
258: ISGetIndices(nedfieldlocal, &idxs);
259: for (i = 0, cum = 0; i < ne; i++) {
260: if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
261: }
262: ISRestoreIndices(nedfieldlocal, &idxs);
263: } else {
264: for (i = 0, cum = 0; i < ne; i++) {
265: if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
266: }
267: }
268: VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals);
269: ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal);
270: PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal);
271: PetscFree(eidxs);
272: ISDestroy(&nedfieldlocal);
273: ISDestroy(&enedfieldlocal);
274: return 0;
275: }
277: /* Compute some l2g maps */
278: if (nedfieldlocal) {
279: IS is;
281: /* need to map from the local Nedelec field to local numbering */
282: ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g);
283: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
284: ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is);
285: ISLocalToGlobalMappingCreateIS(is, &al2g);
286: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
287: if (global) {
288: PetscObjectReference((PetscObject)al2g);
289: el2g = al2g;
290: } else {
291: IS gis;
293: ISRenumber(is, NULL, NULL, &gis);
294: ISLocalToGlobalMappingCreateIS(gis, &el2g);
295: ISDestroy(&gis);
296: }
297: ISDestroy(&is);
298: } else {
299: /* restore default */
300: pcbddc->nedfield = -1;
301: /* one ref for the destruction of al2g, one for el2g */
302: PetscObjectReference((PetscObject)al2g);
303: PetscObjectReference((PetscObject)al2g);
304: el2g = al2g;
305: fl2g = NULL;
306: }
308: /* Start communication to drop connections for interior edges (for cc analysis only) */
309: PetscArrayzero(matis->sf_leafdata, n);
310: PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n);
311: if (nedfieldlocal) {
312: ISGetIndices(nedfieldlocal, &idxs);
313: for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
314: ISRestoreIndices(nedfieldlocal, &idxs);
315: } else {
316: for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
317: }
318: PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM);
319: PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM);
321: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
322: MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G);
323: MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE);
324: if (global) {
325: PetscInt rst;
327: MatGetOwnershipRange(G, &rst, NULL);
328: for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
329: if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
330: }
331: MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE);
332: MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL);
333: } else {
334: PetscInt *tbz;
336: PetscMalloc1(ne, &tbz);
337: PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
338: PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
339: ISGetIndices(nedfieldlocal, &idxs);
340: for (i = 0, cum = 0; i < ne; i++)
341: if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
342: ISRestoreIndices(nedfieldlocal, &idxs);
343: ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz);
344: MatZeroRows(G, cum, tbz, 0., NULL, NULL);
345: PetscFree(tbz);
346: }
347: } else { /* we need the entire G to infer the nullspace */
348: PetscObjectReference((PetscObject)pcbddc->discretegradient);
349: G = pcbddc->discretegradient;
350: }
352: /* Extract subdomain relevant rows of G */
353: ISLocalToGlobalMappingGetIndices(el2g, &idxs);
354: ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned);
355: MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall);
356: ISLocalToGlobalMappingRestoreIndices(el2g, &idxs);
357: ISDestroy(&lned);
358: MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis);
359: MatDestroy(&lGall);
360: MatISGetLocalMat(lGis, &lG);
362: /* SF for nodal dofs communications */
363: MatGetLocalSize(G, NULL, &Lv);
364: MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g);
365: PetscObjectReference((PetscObject)vl2g);
366: ISLocalToGlobalMappingGetSize(vl2g, &nv);
367: PetscSFCreate(comm, &sfv);
368: ISLocalToGlobalMappingGetIndices(vl2g, &idxs);
369: PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs);
370: ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs);
371: i = singular ? 2 : 1;
372: PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots);
374: /* Destroy temporary G created in MATIS format and modified G */
375: PetscObjectReference((PetscObject)lG);
376: MatDestroy(&lGis);
377: MatDestroy(&G);
379: if (print) {
380: PetscObjectSetName((PetscObject)lG, "initial_lG");
381: MatView(lG, NULL);
382: }
384: /* Save lG for values insertion in change of basis */
385: MatDuplicate(lG, MAT_COPY_VALUES, &lGinit);
387: /* Analyze the edge-nodes connections (duplicate lG) */
388: MatDuplicate(lG, MAT_COPY_VALUES, &lGe);
389: MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE);
390: PetscBTCreate(nv, &btv);
391: PetscBTCreate(ne, &bte);
392: PetscBTCreate(ne, &btb);
393: PetscBTCreate(ne, &btbd);
394: PetscBTCreate(nv, &btvcand);
395: /* need to import the boundary specification to ensure the
396: proper detection of coarse edges' endpoints */
397: if (pcbddc->DirichletBoundariesLocal) {
398: IS is;
400: if (fl2g) {
401: ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is);
402: } else {
403: is = pcbddc->DirichletBoundariesLocal;
404: }
405: ISGetLocalSize(is, &cum);
406: ISGetIndices(is, &idxs);
407: for (i = 0; i < cum; i++) {
408: if (idxs[i] >= 0) {
409: PetscBTSet(btb, idxs[i]);
410: PetscBTSet(btbd, idxs[i]);
411: }
412: }
413: ISRestoreIndices(is, &idxs);
414: if (fl2g) ISDestroy(&is);
415: }
416: if (pcbddc->NeumannBoundariesLocal) {
417: IS is;
419: if (fl2g) {
420: ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is);
421: } else {
422: is = pcbddc->NeumannBoundariesLocal;
423: }
424: ISGetLocalSize(is, &cum);
425: ISGetIndices(is, &idxs);
426: for (i = 0; i < cum; i++) {
427: if (idxs[i] >= 0) PetscBTSet(btb, idxs[i]);
428: }
429: ISRestoreIndices(is, &idxs);
430: if (fl2g) ISDestroy(&is);
431: }
433: /* Count neighs per dof */
434: ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs);
435: ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs);
437: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
438: for proper detection of coarse edges' endpoints */
439: PetscBTCreate(ne, &btee);
440: for (i = 0; i < ne; i++) {
441: if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscBTSet(btee, i);
442: }
443: PetscMalloc1(ne, &marks);
444: if (!conforming) {
445: MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt);
446: MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
447: }
448: MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
449: MatSeqAIJGetArray(lGe, &vals);
450: cum = 0;
451: for (i = 0; i < ne; i++) {
452: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
453: if (!PetscBTLookup(btee, i)) {
454: marks[cum++] = i;
455: continue;
456: }
457: /* set badly connected edge dofs as primal */
458: if (!conforming) {
459: if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
460: marks[cum++] = i;
461: PetscBTSet(bte, i);
462: for (j = ii[i]; j < ii[i + 1]; j++) PetscBTSet(btv, jj[j]);
463: } else {
464: /* every edge dofs should be connected trough a certain number of nodal dofs
465: to other edge dofs belonging to coarse edges
466: - at most 2 endpoints
467: - order-1 interior nodal dofs
468: - no undefined nodal dofs (nconn < order)
469: */
470: PetscInt ends = 0, ints = 0, undef = 0;
471: for (j = ii[i]; j < ii[i + 1]; j++) {
472: PetscInt v = jj[j], k;
473: PetscInt nconn = iit[v + 1] - iit[v];
474: for (k = iit[v]; k < iit[v + 1]; k++)
475: if (!PetscBTLookup(btee, jjt[k])) nconn--;
476: if (nconn > order) ends++;
477: else if (nconn == order) ints++;
478: else undef++;
479: }
480: if (undef || ends > 2 || ints != order - 1) {
481: marks[cum++] = i;
482: PetscBTSet(bte, i);
483: for (j = ii[i]; j < ii[i + 1]; j++) PetscBTSet(btv, jj[j]);
484: }
485: }
486: }
487: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
488: if (!order && ii[i + 1] != ii[i]) {
489: PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
490: for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
491: }
492: }
493: PetscBTDestroy(&btee);
494: MatSeqAIJRestoreArray(lGe, &vals);
495: MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
496: if (!conforming) {
497: MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
498: MatDestroy(&lGt);
499: }
500: MatZeroRows(lGe, cum, marks, 0., NULL, NULL);
502: /* identify splitpoints and corner candidates */
503: MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt);
504: if (print) {
505: PetscObjectSetName((PetscObject)lGe, "edgerestr_lG");
506: MatView(lGe, NULL);
507: PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt");
508: MatView(lGt, NULL);
509: }
510: MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
511: MatSeqAIJGetArray(lGt, &vals);
512: for (i = 0; i < nv; i++) {
513: PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
514: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
515: if (!order) { /* variable order */
516: PetscReal vorder = 0.;
518: for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
519: test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
521: ord = 1;
522: }
523: PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord);
524: for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
525: if (PetscBTLookup(btbd, jj[j])) {
526: bdir = PETSC_TRUE;
527: break;
528: }
529: if (vc != ecount[jj[j]]) {
530: sneighs = PETSC_FALSE;
531: } else {
532: PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]];
533: for (k = 0; k < vc; k++) {
534: if (vn[k] != en[k]) {
535: sneighs = PETSC_FALSE;
536: break;
537: }
538: }
539: }
540: }
541: if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
542: if (print) PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]);
543: PetscBTSet(btv, i);
544: } else if (test == ord) {
545: if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
546: if (print) PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i);
547: PetscBTSet(btv, i);
548: } else {
549: if (print) PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i);
550: PetscBTSet(btvcand, i);
551: }
552: }
553: }
554: ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs);
555: ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs);
556: PetscBTDestroy(&btbd);
558: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
559: if (order != 1) {
560: if (print) PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n");
561: MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
562: for (i = 0; i < nv; i++) {
563: if (PetscBTLookup(btvcand, i)) {
564: PetscBool found = PETSC_FALSE;
565: for (j = ii[i]; j < ii[i + 1] && !found; j++) {
566: PetscInt k, e = jj[j];
567: if (PetscBTLookup(bte, e)) continue;
568: for (k = iit[e]; k < iit[e + 1]; k++) {
569: PetscInt v = jjt[k];
570: if (v != i && PetscBTLookup(btvcand, v)) {
571: found = PETSC_TRUE;
572: break;
573: }
574: }
575: }
576: if (!found) {
577: if (print) PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i);
578: PetscBTClear(btvcand, i);
579: } else {
580: if (print) PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i);
581: }
582: }
583: }
584: MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
585: }
586: MatSeqAIJRestoreArray(lGt, &vals);
587: MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
588: MatDestroy(&lGe);
590: /* Get the local G^T explicitly */
591: MatDestroy(&lGt);
592: MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt);
593: MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE);
595: /* Mark interior nodal dofs */
596: ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared);
597: PetscBTCreate(nv, &btvi);
598: for (i = 1; i < n_neigh; i++) {
599: for (j = 0; j < n_shared[i]; j++) PetscBTSet(btvi, shared[i][j]);
600: }
601: ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared);
603: /* communicate corners and splitpoints */
604: PetscMalloc1(nv, &vmarks);
605: PetscArrayzero(sfvleaves, nv);
606: PetscArrayzero(sfvroots, Lv);
607: for (i = 0; i < nv; i++)
608: if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
610: if (print) {
611: IS tbz;
613: cum = 0;
614: for (i = 0; i < nv; i++)
615: if (sfvleaves[i]) vmarks[cum++] = i;
617: ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz);
618: PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local");
619: ISView(tbz, NULL);
620: ISDestroy(&tbz);
621: }
623: PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM);
624: PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM);
625: PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE);
626: PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE);
628: /* Zero rows of lGt corresponding to identified corners
629: and interior nodal dofs */
630: cum = 0;
631: for (i = 0; i < nv; i++) {
632: if (sfvleaves[i]) {
633: vmarks[cum++] = i;
634: PetscBTSet(btv, i);
635: }
636: if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
637: }
638: PetscBTDestroy(&btvi);
639: if (print) {
640: IS tbz;
642: ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz);
643: PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior");
644: ISView(tbz, NULL);
645: ISDestroy(&tbz);
646: }
647: MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL);
648: PetscFree(vmarks);
649: PetscSFDestroy(&sfv);
650: PetscFree2(sfvleaves, sfvroots);
652: /* Recompute G */
653: MatDestroy(&lG);
654: MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG);
655: if (print) {
656: PetscObjectSetName((PetscObject)lG, "used_lG");
657: MatView(lG, NULL);
658: PetscObjectSetName((PetscObject)lGt, "used_lGt");
659: MatView(lGt, NULL);
660: }
662: /* Get primal dofs (if any) */
663: cum = 0;
664: for (i = 0; i < ne; i++) {
665: if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
666: }
667: if (fl2g) ISLocalToGlobalMappingApply(fl2g, cum, marks, marks);
668: ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals);
669: if (print) {
670: PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs");
671: ISView(primals, NULL);
672: }
673: PetscBTDestroy(&bte);
674: /* TODO: what if the user passed in some of them ? */
675: PCBDDCSetPrimalVerticesLocalIS(pc, primals);
676: ISDestroy(&primals);
678: /* Compute edge connectivity */
679: PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_");
681: /* Symbolic conn = lG*lGt */
682: MatProductCreate(lG, lGt, NULL, &conn);
683: MatProductSetType(conn, MATPRODUCT_AB);
684: MatProductSetAlgorithm(conn, "default");
685: MatProductSetFill(conn, PETSC_DEFAULT);
686: PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_");
687: MatProductSetFromOptions(conn);
688: MatProductSymbolic(conn);
690: MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
691: if (fl2g) {
692: PetscBT btf;
693: PetscInt *iia, *jja, *iiu, *jju;
694: PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
696: /* create CSR for all local dofs */
697: PetscMalloc1(n + 1, &iia);
698: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
700: iiu = pcbddc->mat_graph->xadj;
701: jju = pcbddc->mat_graph->adjncy;
702: } else if (pcbddc->use_local_adj) {
703: rest = PETSC_TRUE;
704: MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done);
705: } else {
706: free = PETSC_TRUE;
707: PetscMalloc2(n + 1, &iiu, n, &jju);
708: iiu[0] = 0;
709: for (i = 0; i < n; i++) {
710: iiu[i + 1] = i + 1;
711: jju[i] = -1;
712: }
713: }
715: /* import sizes of CSR */
716: iia[0] = 0;
717: for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
719: /* overwrite entries corresponding to the Nedelec field */
720: PetscBTCreate(n, &btf);
721: ISGetIndices(nedfieldlocal, &idxs);
722: for (i = 0; i < ne; i++) {
723: PetscBTSet(btf, idxs[i]);
724: iia[idxs[i] + 1] = ii[i + 1] - ii[i];
725: }
727: /* iia in CSR */
728: for (i = 0; i < n; i++) iia[i + 1] += iia[i];
730: /* jja in CSR */
731: PetscMalloc1(iia[n], &jja);
732: for (i = 0; i < n; i++)
733: if (!PetscBTLookup(btf, i))
734: for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
736: /* map edge dofs connectivity */
737: if (jj) {
738: ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj);
739: for (i = 0; i < ne; i++) {
740: PetscInt e = idxs[i];
741: for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
742: }
743: }
744: ISRestoreIndices(nedfieldlocal, &idxs);
745: PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER);
746: if (rest) MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done);
747: if (free) PetscFree2(iiu, jju);
748: PetscBTDestroy(&btf);
749: } else {
750: PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER);
751: }
753: /* Analyze interface for edge dofs */
754: PCBDDCAnalyzeInterface(pc);
755: pcbddc->mat_graph->twodim = PETSC_FALSE;
757: /* Get coarse edges in the edge space */
758: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals);
759: MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
761: if (fl2g) {
762: ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals);
763: PetscMalloc1(nee, &eedges);
764: for (i = 0; i < nee; i++) ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]);
765: } else {
766: eedges = alleedges;
767: primals = allprimals;
768: }
770: /* Mark fine edge dofs with their coarse edge id */
771: PetscArrayzero(marks, ne);
772: ISGetLocalSize(primals, &cum);
773: ISGetIndices(primals, &idxs);
774: for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
775: ISRestoreIndices(primals, &idxs);
776: if (print) {
777: PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs");
778: ISView(primals, NULL);
779: }
781: maxsize = 0;
782: for (i = 0; i < nee; i++) {
783: PetscInt size, mark = i + 1;
785: ISGetLocalSize(eedges[i], &size);
786: ISGetIndices(eedges[i], &idxs);
787: for (j = 0; j < size; j++) marks[idxs[j]] = mark;
788: ISRestoreIndices(eedges[i], &idxs);
789: maxsize = PetscMax(maxsize, size);
790: }
792: /* Find coarse edge endpoints */
793: MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
794: MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
795: for (i = 0; i < nee; i++) {
796: PetscInt mark = i + 1, size;
798: ISGetLocalSize(eedges[i], &size);
799: if (!size && nedfieldlocal) continue;
801: ISGetIndices(eedges[i], &idxs);
802: if (print) {
803: PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i);
804: ISView(eedges[i], NULL);
805: }
806: for (j = 0; j < size; j++) {
807: PetscInt k, ee = idxs[j];
808: if (print) PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee);
809: for (k = ii[ee]; k < ii[ee + 1]; k++) {
810: if (print) PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k]);
811: if (PetscBTLookup(btv, jj[k])) {
812: if (print) PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k]);
813: } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
814: PetscInt k2;
815: PetscBool corner = PETSC_FALSE;
816: for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
817: if (print) PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]));
818: /* it's a corner if either is connected with an edge dof belonging to a different cc or
819: if the edge dof lie on the natural part of the boundary */
820: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
821: corner = PETSC_TRUE;
822: break;
823: }
824: }
825: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
826: if (print) PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k]);
827: PetscBTSet(btv, jj[k]);
828: } else {
829: if (print) PetscPrintf(PETSC_COMM_SELF, " no corners found\n");
830: }
831: }
832: }
833: }
834: ISRestoreIndices(eedges[i], &idxs);
835: }
836: MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
837: MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
838: PetscBTDestroy(&btb);
840: /* Reset marked primal dofs */
841: ISGetLocalSize(primals, &cum);
842: ISGetIndices(primals, &idxs);
843: for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
844: ISRestoreIndices(primals, &idxs);
846: /* Now use the initial lG */
847: MatDestroy(&lG);
848: MatDestroy(&lGt);
849: lG = lGinit;
850: MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt);
852: /* Compute extended cols indices */
853: PetscBTCreate(nv, &btvc);
854: PetscBTCreate(nee, &bter);
855: MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
856: MatSeqAIJGetMaxRowNonzeros(lG, &i);
857: i *= maxsize;
858: PetscCalloc1(nee, &extcols);
859: PetscMalloc2(i, &extrow, i, &gidxs);
860: eerr = PETSC_FALSE;
861: for (i = 0; i < nee; i++) {
862: PetscInt size, found = 0;
864: cum = 0;
865: ISGetLocalSize(eedges[i], &size);
866: if (!size && nedfieldlocal) continue;
868: ISGetIndices(eedges[i], &idxs);
869: PetscBTMemzero(nv, btvc);
870: for (j = 0; j < size; j++) {
871: PetscInt k, ee = idxs[j];
872: for (k = ii[ee]; k < ii[ee + 1]; k++) {
873: PetscInt vv = jj[k];
874: if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
875: else if (!PetscBTLookupSet(btvc, vv)) found++;
876: }
877: }
878: ISRestoreIndices(eedges[i], &idxs);
879: PetscSortRemoveDupsInt(&cum, extrow);
880: ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs);
881: PetscSortIntWithArray(cum, gidxs, extrow);
882: ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]);
883: /* it may happen that endpoints are not defined at this point
884: if it is the case, mark this edge for a second pass */
885: if (cum != size - 1 || found != 2) {
886: PetscBTSet(bter, i);
887: if (print) {
888: PetscObjectSetName((PetscObject)eedges[i], "error_edge");
889: ISView(eedges[i], NULL);
890: PetscObjectSetName((PetscObject)extcols[i], "error_extcol");
891: ISView(extcols[i], NULL);
892: }
893: eerr = PETSC_TRUE;
894: }
895: }
897: MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm);
898: if (done) {
899: PetscInt *newprimals;
901: PetscMalloc1(ne, &newprimals);
902: ISGetLocalSize(primals, &cum);
903: ISGetIndices(primals, &idxs);
904: PetscArraycpy(newprimals, idxs, cum);
905: ISRestoreIndices(primals, &idxs);
906: MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
907: if (print) PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]);
908: for (i = 0; i < nee; i++) {
909: PetscBool has_candidates = PETSC_FALSE;
910: if (PetscBTLookup(bter, i)) {
911: PetscInt size, mark = i + 1;
913: ISGetLocalSize(eedges[i], &size);
914: ISGetIndices(eedges[i], &idxs);
915: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
916: for (j = 0; j < size; j++) {
917: PetscInt k, ee = idxs[j];
918: if (print) PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]);
919: for (k = ii[ee]; k < ii[ee + 1]; k++) {
920: /* set all candidates located on the edge as corners */
921: if (PetscBTLookup(btvcand, jj[k])) {
922: PetscInt k2, vv = jj[k];
923: has_candidates = PETSC_TRUE;
924: if (print) PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv);
925: PetscBTSet(btv, vv);
926: /* set all edge dofs connected to candidate as primals */
927: for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
928: if (marks[jjt[k2]] == mark) {
929: PetscInt k3, ee2 = jjt[k2];
930: if (print) PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2);
931: newprimals[cum++] = ee2;
932: /* finally set the new corners */
933: for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
934: if (print) PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]);
935: PetscBTSet(btv, jj[k3]);
936: }
937: }
938: }
939: } else {
940: if (print) PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k]);
941: }
942: }
943: }
944: if (!has_candidates) { /* circular edge */
945: PetscInt k, ee = idxs[0], *tmarks;
947: PetscCalloc1(ne, &tmarks);
948: if (print) PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i);
949: for (k = ii[ee]; k < ii[ee + 1]; k++) {
950: PetscInt k2;
951: if (print) PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k]);
952: PetscBTSet(btv, jj[k]);
953: for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
954: }
955: for (j = 0; j < size; j++) {
956: if (tmarks[idxs[j]] > 1) {
957: if (print) PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]);
958: newprimals[cum++] = idxs[j];
959: }
960: }
961: PetscFree(tmarks);
962: }
963: ISRestoreIndices(eedges[i], &idxs);
964: }
965: ISDestroy(&extcols[i]);
966: }
967: PetscFree(extcols);
968: MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
969: PetscSortRemoveDupsInt(&cum, newprimals);
970: if (fl2g) {
971: ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals);
972: ISDestroy(&primals);
973: for (i = 0; i < nee; i++) ISDestroy(&eedges[i]);
974: PetscFree(eedges);
975: }
976: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals);
977: ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals);
978: PetscFree(newprimals);
979: PCBDDCSetPrimalVerticesLocalIS(pc, primals);
980: ISDestroy(&primals);
981: PCBDDCAnalyzeInterface(pc);
982: pcbddc->mat_graph->twodim = PETSC_FALSE;
983: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals);
984: if (fl2g) {
985: ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals);
986: PetscMalloc1(nee, &eedges);
987: for (i = 0; i < nee; i++) ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]);
988: } else {
989: eedges = alleedges;
990: primals = allprimals;
991: }
992: PetscCalloc1(nee, &extcols);
994: /* Mark again */
995: PetscArrayzero(marks, ne);
996: for (i = 0; i < nee; i++) {
997: PetscInt size, mark = i + 1;
999: ISGetLocalSize(eedges[i], &size);
1000: ISGetIndices(eedges[i], &idxs);
1001: for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1002: ISRestoreIndices(eedges[i], &idxs);
1003: }
1004: if (print) {
1005: PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass");
1006: ISView(primals, NULL);
1007: }
1009: /* Recompute extended cols */
1010: eerr = PETSC_FALSE;
1011: for (i = 0; i < nee; i++) {
1012: PetscInt size;
1014: cum = 0;
1015: ISGetLocalSize(eedges[i], &size);
1016: if (!size && nedfieldlocal) continue;
1018: ISGetIndices(eedges[i], &idxs);
1019: for (j = 0; j < size; j++) {
1020: PetscInt k, ee = idxs[j];
1021: for (k = ii[ee]; k < ii[ee + 1]; k++)
1022: if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1023: }
1024: ISRestoreIndices(eedges[i], &idxs);
1025: PetscSortRemoveDupsInt(&cum, extrow);
1026: ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs);
1027: PetscSortIntWithArray(cum, gidxs, extrow);
1028: ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]);
1029: if (cum != size - 1) {
1030: if (print) {
1031: PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass");
1032: ISView(eedges[i], NULL);
1033: PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass");
1034: ISView(extcols[i], NULL);
1035: }
1036: eerr = PETSC_TRUE;
1037: }
1038: }
1039: }
1040: MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1041: PetscFree2(extrow, gidxs);
1042: PetscBTDestroy(&bter);
1043: if (print) PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF);
1044: /* an error should not occur at this point */
1047: /* Check the number of endpoints */
1048: MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1049: PetscMalloc1(2 * nee, &corners);
1050: PetscMalloc1(nee, &cedges);
1051: for (i = 0; i < nee; i++) {
1052: PetscInt size, found = 0, gc[2];
1054: /* init with defaults */
1055: cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1056: ISGetLocalSize(eedges[i], &size);
1057: if (!size && nedfieldlocal) continue;
1059: ISGetIndices(eedges[i], &idxs);
1060: PetscBTMemzero(nv, btvc);
1061: for (j = 0; j < size; j++) {
1062: PetscInt k, ee = idxs[j];
1063: for (k = ii[ee]; k < ii[ee + 1]; k++) {
1064: PetscInt vv = jj[k];
1065: if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1067: corners[i * 2 + found++] = vv;
1068: }
1069: }
1070: }
1071: if (found != 2) {
1072: PetscInt e;
1073: if (fl2g) {
1074: ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e);
1075: } else {
1076: e = idxs[0];
1077: }
1078: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1079: }
1081: /* get primal dof index on this coarse edge */
1082: ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc);
1083: if (gc[0] > gc[1]) {
1084: PetscInt swap = corners[2 * i];
1085: corners[2 * i] = corners[2 * i + 1];
1086: corners[2 * i + 1] = swap;
1087: }
1088: cedges[i] = idxs[size - 1];
1089: ISRestoreIndices(eedges[i], &idxs);
1090: if (print) PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]);
1091: }
1092: MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1093: PetscBTDestroy(&btvc);
1095: if (PetscDefined(USE_DEBUG)) {
1096: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1097: not interfere with neighbouring coarse edges */
1098: PetscMalloc1(nee + 1, &emarks);
1099: MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1100: for (i = 0; i < nv; i++) {
1101: PetscInt emax = 0, eemax = 0;
1103: if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1104: PetscArrayzero(emarks, nee + 1);
1105: for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1106: for (j = 1; j < nee + 1; j++) {
1107: if (emax < emarks[j]) {
1108: emax = emarks[j];
1109: eemax = j;
1110: }
1111: }
1112: /* not relevant for edges */
1113: if (!eemax) continue;
1115: for (j = ii[i]; j < ii[i + 1]; j++) {
1117: }
1118: }
1119: PetscFree(emarks);
1120: MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1121: }
1123: /* Compute extended rows indices for edge blocks of the change of basis */
1124: MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1125: MatSeqAIJGetMaxRowNonzeros(lGt, &extmem);
1126: extmem *= maxsize;
1127: PetscMalloc1(extmem * nee, &extrow);
1128: PetscMalloc1(nee, &extrows);
1129: PetscCalloc1(nee, &extrowcum);
1130: for (i = 0; i < nv; i++) {
1131: PetscInt mark = 0, size, start;
1133: if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1134: for (j = ii[i]; j < ii[i + 1]; j++)
1135: if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1137: /* not relevant */
1138: if (!mark) continue;
1140: /* import extended row */
1141: mark--;
1142: start = mark * extmem + extrowcum[mark];
1143: size = ii[i + 1] - ii[i];
1145: PetscArraycpy(extrow + start, jj + ii[i], size);
1146: extrowcum[mark] += size;
1147: }
1148: MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1149: MatDestroy(&lGt);
1150: PetscFree(marks);
1152: /* Compress extrows */
1153: cum = 0;
1154: for (i = 0; i < nee; i++) {
1155: PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1156: PetscSortRemoveDupsInt(&size, start);
1157: ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]);
1158: cum = PetscMax(cum, size);
1159: }
1160: PetscFree(extrowcum);
1161: PetscBTDestroy(&btv);
1162: PetscBTDestroy(&btvcand);
1164: /* Workspace for lapack inner calls and VecSetValues */
1165: PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork);
1167: /* Create change of basis matrix (preallocation can be improved) */
1168: MatCreate(comm, &T);
1169: MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N);
1170: MatSetType(T, MATAIJ);
1171: MatSeqAIJSetPreallocation(T, 10, NULL);
1172: MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL);
1173: MatSetLocalToGlobalMapping(T, al2g, al2g);
1174: MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
1175: MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE);
1176: ISLocalToGlobalMappingDestroy(&al2g);
1178: /* Defaults to identity */
1179: MatCreateVecs(pc->pmat, &tvec, NULL);
1180: VecSet(tvec, 1.0);
1181: MatDiagonalSet(T, tvec, INSERT_VALUES);
1182: VecDestroy(&tvec);
1184: /* Create discrete gradient for the coarser level if needed */
1185: MatDestroy(&pcbddc->nedcG);
1186: ISDestroy(&pcbddc->nedclocal);
1187: if (pcbddc->current_level < pcbddc->max_levels) {
1188: ISLocalToGlobalMapping cel2g, cvl2g;
1189: IS wis, gwis;
1190: PetscInt cnv, cne;
1192: ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis);
1193: if (fl2g) {
1194: ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal);
1195: } else {
1196: PetscObjectReference((PetscObject)wis);
1197: pcbddc->nedclocal = wis;
1198: }
1199: ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis);
1200: ISDestroy(&wis);
1201: ISRenumber(gwis, NULL, &cne, &wis);
1202: ISLocalToGlobalMappingCreateIS(wis, &cel2g);
1203: ISDestroy(&wis);
1204: ISDestroy(&gwis);
1206: ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis);
1207: ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis);
1208: ISDestroy(&wis);
1209: ISRenumber(gwis, NULL, &cnv, &wis);
1210: ISLocalToGlobalMappingCreateIS(wis, &cvl2g);
1211: ISDestroy(&wis);
1212: ISDestroy(&gwis);
1214: MatCreate(comm, &pcbddc->nedcG);
1215: MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv);
1216: MatSetType(pcbddc->nedcG, MATAIJ);
1217: MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL);
1218: MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL);
1219: MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g);
1220: ISLocalToGlobalMappingDestroy(&cel2g);
1221: ISLocalToGlobalMappingDestroy(&cvl2g);
1222: }
1223: ISLocalToGlobalMappingDestroy(&vl2g);
1225: #if defined(PRINT_GDET)
1226: inc = 0;
1227: lev = pcbddc->current_level;
1228: #endif
1230: /* Insert values in the change of basis matrix */
1231: for (i = 0; i < nee; i++) {
1232: Mat Gins = NULL, GKins = NULL;
1233: IS cornersis = NULL;
1234: PetscScalar cvals[2];
1236: if (pcbddc->nedcG) ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis);
1237: PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork);
1238: if (Gins && GKins) {
1239: const PetscScalar *data;
1240: const PetscInt *rows, *cols;
1241: PetscInt nrh, nch, nrc, ncc;
1243: ISGetIndices(eedges[i], &cols);
1244: /* H1 */
1245: ISGetIndices(extrows[i], &rows);
1246: MatGetSize(Gins, &nrh, &nch);
1247: MatDenseGetArrayRead(Gins, &data);
1248: MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES);
1249: MatDenseRestoreArrayRead(Gins, &data);
1250: ISRestoreIndices(extrows[i], &rows);
1251: /* complement */
1252: MatGetSize(GKins, &nrc, &ncc);
1256: MatDenseGetArrayRead(GKins, &data);
1257: MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES);
1258: MatDenseRestoreArrayRead(GKins, &data);
1260: /* coarse discrete gradient */
1261: if (pcbddc->nedcG) {
1262: PetscInt cols[2];
1264: cols[0] = 2 * i;
1265: cols[1] = 2 * i + 1;
1266: MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES);
1267: }
1268: ISRestoreIndices(eedges[i], &cols);
1269: }
1270: ISDestroy(&extrows[i]);
1271: ISDestroy(&extcols[i]);
1272: ISDestroy(&cornersis);
1273: MatDestroy(&Gins);
1274: MatDestroy(&GKins);
1275: }
1276: ISLocalToGlobalMappingDestroy(&el2g);
1278: /* Start assembling */
1279: MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY);
1280: if (pcbddc->nedcG) MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY);
1282: /* Free */
1283: if (fl2g) {
1284: ISDestroy(&primals);
1285: for (i = 0; i < nee; i++) ISDestroy(&eedges[i]);
1286: PetscFree(eedges);
1287: }
1289: /* hack mat_graph with primal dofs on the coarse edges */
1290: {
1291: PCBDDCGraph graph = pcbddc->mat_graph;
1292: PetscInt *oqueue = graph->queue;
1293: PetscInt *ocptr = graph->cptr;
1294: PetscInt ncc, *idxs;
1296: /* find first primal edge */
1297: if (pcbddc->nedclocal) {
1298: ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs);
1299: } else {
1300: if (fl2g) ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges);
1301: idxs = cedges;
1302: }
1303: cum = 0;
1304: while (cum < nee && cedges[cum] < 0) cum++;
1306: /* adapt connected components */
1307: PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue);
1308: graph->cptr[0] = 0;
1309: for (i = 0, ncc = 0; i < graph->ncc; i++) {
1310: PetscInt lc = ocptr[i + 1] - ocptr[i];
1311: if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1312: graph->cptr[ncc + 1] = graph->cptr[ncc] + 1;
1313: graph->queue[graph->cptr[ncc]] = cedges[cum];
1314: ncc++;
1315: lc--;
1316: cum++;
1317: while (cum < nee && cedges[cum] < 0) cum++;
1318: }
1319: graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1320: for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1321: ncc++;
1322: }
1323: graph->ncc = ncc;
1324: if (pcbddc->nedclocal) ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs);
1325: PetscFree2(ocptr, oqueue);
1326: }
1327: ISLocalToGlobalMappingDestroy(&fl2g);
1328: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals);
1329: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1330: MatDestroy(&conn);
1332: ISDestroy(&nedfieldlocal);
1333: PetscFree(extrow);
1334: PetscFree2(work, rwork);
1335: PetscFree(corners);
1336: PetscFree(cedges);
1337: PetscFree(extrows);
1338: PetscFree(extcols);
1339: MatDestroy(&lG);
1341: /* Complete assembling */
1342: MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY);
1343: if (pcbddc->nedcG) {
1344: MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY);
1345: #if 0
1346: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1347: MatView(pcbddc->nedcG,NULL);
1348: #endif
1349: }
1351: /* set change of basis */
1352: PCBDDCSetChangeOfBasisMat(pc, T, singular);
1353: MatDestroy(&T);
1355: return 0;
1356: }
1358: /* the near-null space of BDDC carries information on quadrature weights,
1359: and these can be collinear -> so cheat with MatNullSpaceCreate
1360: and create a suitable set of basis vectors first */
1361: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1362: {
1363: PetscInt i;
1365: for (i = 0; i < nvecs; i++) {
1366: PetscInt first, last;
1368: VecGetOwnershipRange(quad_vecs[i], &first, &last);
1370: if (i >= first && i < last) {
1371: PetscScalar *data;
1372: VecGetArray(quad_vecs[i], &data);
1373: if (!has_const) {
1374: data[i - first] = 1.;
1375: } else {
1376: data[2 * i - first] = 1. / PetscSqrtReal(2.);
1377: data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1378: }
1379: VecRestoreArray(quad_vecs[i], &data);
1380: }
1381: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1382: }
1383: MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp);
1384: for (i = 0; i < nvecs; i++) { /* reset vectors */
1385: PetscInt first, last;
1386: VecLockReadPop(quad_vecs[i]);
1387: VecGetOwnershipRange(quad_vecs[i], &first, &last);
1388: if (i >= first && i < last) {
1389: PetscScalar *data;
1390: VecGetArray(quad_vecs[i], &data);
1391: if (!has_const) {
1392: data[i - first] = 0.;
1393: } else {
1394: data[2 * i - first] = 0.;
1395: data[2 * i - first + 1] = 0.;
1396: }
1397: VecRestoreArray(quad_vecs[i], &data);
1398: }
1399: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1400: VecLockReadPush(quad_vecs[i]);
1401: }
1402: return 0;
1403: }
1405: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1406: {
1407: Mat loc_divudotp;
1408: Vec p, v, vins, quad_vec, *quad_vecs;
1409: ISLocalToGlobalMapping map;
1410: PetscScalar *vals;
1411: const PetscScalar *array;
1412: PetscInt i, maxneighs = 0, maxsize, *gidxs;
1413: PetscInt n_neigh, *neigh, *n_shared, **shared;
1414: PetscMPIInt rank;
1416: ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared);
1417: for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs);
1418: MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
1419: if (!maxneighs) {
1420: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared);
1421: *nnsp = NULL;
1422: return 0;
1423: }
1424: maxsize = 0;
1425: for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize);
1426: PetscMalloc2(maxsize, &gidxs, maxsize, &vals);
1427: /* create vectors to hold quadrature weights */
1428: MatCreateVecs(A, &quad_vec, NULL);
1429: if (!transpose) {
1430: MatISGetLocalToGlobalMapping(A, &map, NULL);
1431: } else {
1432: MatISGetLocalToGlobalMapping(A, NULL, &map);
1433: }
1434: VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs);
1435: VecDestroy(&quad_vec);
1436: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp);
1437: for (i = 0; i < maxneighs; i++) VecLockReadPop(quad_vecs[i]);
1439: /* compute local quad vec */
1440: MatISGetLocalMat(divudotp, &loc_divudotp);
1441: if (!transpose) {
1442: MatCreateVecs(loc_divudotp, &v, &p);
1443: } else {
1444: MatCreateVecs(loc_divudotp, &p, &v);
1445: }
1446: VecSet(p, 1.);
1447: if (!transpose) {
1448: MatMultTranspose(loc_divudotp, p, v);
1449: } else {
1450: MatMult(loc_divudotp, p, v);
1451: }
1452: if (vl2l) {
1453: Mat lA;
1454: VecScatter sc;
1456: MatISGetLocalMat(A, &lA);
1457: MatCreateVecs(lA, &vins, NULL);
1458: VecScatterCreate(v, NULL, vins, vl2l, &sc);
1459: VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD);
1460: VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD);
1461: VecScatterDestroy(&sc);
1462: } else {
1463: vins = v;
1464: }
1465: VecGetArrayRead(vins, &array);
1466: VecDestroy(&p);
1468: /* insert in global quadrature vecs */
1469: MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank);
1470: for (i = 1; i < n_neigh; i++) {
1471: const PetscInt *idxs;
1472: PetscInt idx, nn, j;
1474: idxs = shared[i];
1475: nn = n_shared[i];
1476: for (j = 0; j < nn; j++) vals[j] = array[idxs[j]];
1477: PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx);
1478: idx = -(idx + 1);
1480: ISLocalToGlobalMappingApply(map, nn, idxs, gidxs);
1481: VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES);
1482: }
1483: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared);
1484: VecRestoreArrayRead(vins, &array);
1485: if (vl2l) VecDestroy(&vins);
1486: VecDestroy(&v);
1487: PetscFree2(gidxs, vals);
1489: /* assemble near null space */
1490: for (i = 0; i < maxneighs; i++) VecAssemblyBegin(quad_vecs[i]);
1491: for (i = 0; i < maxneighs; i++) {
1492: VecAssemblyEnd(quad_vecs[i]);
1493: VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view");
1494: VecLockReadPush(quad_vecs[i]);
1495: }
1496: VecDestroyVecs(maxneighs, &quad_vecs);
1497: return 0;
1498: }
1500: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1501: {
1502: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1504: if (primalv) {
1505: if (pcbddc->user_primal_vertices_local) {
1506: IS list[2], newp;
1508: list[0] = primalv;
1509: list[1] = pcbddc->user_primal_vertices_local;
1510: ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp);
1511: ISSortRemoveDups(newp);
1512: ISDestroy(&list[1]);
1513: pcbddc->user_primal_vertices_local = newp;
1514: } else {
1515: PCBDDCSetPrimalVerticesLocalIS(pc, primalv);
1516: }
1517: }
1518: return 0;
1519: }
1521: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1522: {
1523: PetscInt f, *comp = (PetscInt *)ctx;
1525: for (f = 0; f < Nf; f++) out[f] = X[*comp];
1526: return 0;
1527: }
1529: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1530: {
1531: Vec local, global;
1532: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1533: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
1534: PetscBool monolithic = PETSC_FALSE;
1536: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1537: PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL);
1538: PetscOptionsEnd();
1539: /* need to convert from global to local topology information and remove references to information in global ordering */
1540: MatCreateVecs(pc->pmat, &global, NULL);
1541: MatCreateVecs(matis->A, &local, NULL);
1542: VecBindToCPU(global, PETSC_TRUE);
1543: VecBindToCPU(local, PETSC_TRUE);
1544: if (monolithic) { /* just get block size to properly compute vertices */
1545: if (pcbddc->vertex_size == 1) MatGetBlockSize(pc->pmat, &pcbddc->vertex_size);
1546: goto boundary;
1547: }
1549: if (pcbddc->user_provided_isfordofs) {
1550: if (pcbddc->n_ISForDofs) {
1551: PetscInt i;
1553: PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal);
1554: for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1555: PetscInt bs;
1557: PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]);
1558: ISGetBlockSize(pcbddc->ISForDofs[i], &bs);
1559: ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs);
1560: ISDestroy(&pcbddc->ISForDofs[i]);
1561: }
1562: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1563: pcbddc->n_ISForDofs = 0;
1564: PetscFree(pcbddc->ISForDofs);
1565: }
1566: } else {
1567: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1568: DM dm;
1570: MatGetDM(pc->pmat, &dm);
1571: if (!dm) PCGetDM(pc, &dm);
1572: if (dm) {
1573: IS *fields;
1574: PetscInt nf, i;
1576: DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL);
1577: PetscMalloc1(nf, &pcbddc->ISForDofsLocal);
1578: for (i = 0; i < nf; i++) {
1579: PetscInt bs;
1581: PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]);
1582: ISGetBlockSize(fields[i], &bs);
1583: ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs);
1584: ISDestroy(&fields[i]);
1585: }
1586: PetscFree(fields);
1587: pcbddc->n_ISForDofsLocal = nf;
1588: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1589: PetscContainer c;
1591: PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c);
1592: if (c) {
1593: MatISLocalFields lf;
1594: PetscContainerGetPointer(c, (void **)&lf);
1595: PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf);
1596: } else { /* fallback, create the default fields if bs > 1 */
1597: PetscInt i, n = matis->A->rmap->n;
1598: MatGetBlockSize(pc->pmat, &i);
1599: if (i > 1) {
1600: pcbddc->n_ISForDofsLocal = i;
1601: PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal);
1602: for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]);
1603: }
1604: }
1605: }
1606: } else {
1607: PetscInt i;
1608: for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]);
1609: }
1610: }
1612: boundary:
1613: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1614: PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal);
1615: } else if (pcbddc->DirichletBoundariesLocal) {
1616: PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal);
1617: }
1618: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1619: PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal);
1620: } else if (pcbddc->NeumannBoundariesLocal) {
1621: PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal);
1622: }
1623: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local);
1624: VecDestroy(&global);
1625: VecDestroy(&local);
1626: /* detect local disconnected subdomains if requested (use matis->A) */
1627: if (pcbddc->detect_disconnected) {
1628: IS primalv = NULL;
1629: PetscInt i;
1630: PetscBool filter = pcbddc->detect_disconnected_filter;
1632: for (i = 0; i < pcbddc->n_local_subs; i++) ISDestroy(&pcbddc->local_subs[i]);
1633: PetscFree(pcbddc->local_subs);
1634: PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv);
1635: PCBDDCAddPrimalVerticesLocalIS(pc, primalv);
1636: ISDestroy(&primalv);
1637: }
1638: /* early stage corner detection */
1639: {
1640: DM dm;
1642: MatGetDM(pc->pmat, &dm);
1643: if (!dm) PCGetDM(pc, &dm);
1644: if (dm) {
1645: PetscBool isda;
1647: PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda);
1648: if (isda) {
1649: ISLocalToGlobalMapping l2l;
1650: IS corners;
1651: Mat lA;
1652: PetscBool gl, lo;
1654: {
1655: Vec cvec;
1656: const PetscScalar *coords;
1657: PetscInt dof, n, cdim;
1658: PetscBool memc = PETSC_TRUE;
1660: DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL);
1661: DMGetCoordinates(dm, &cvec);
1662: VecGetLocalSize(cvec, &n);
1663: VecGetBlockSize(cvec, &cdim);
1664: n /= cdim;
1665: PetscFree(pcbddc->mat_graph->coords);
1666: PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords);
1667: VecGetArrayRead(cvec, &coords);
1668: #if defined(PETSC_USE_COMPLEX)
1669: memc = PETSC_FALSE;
1670: #endif
1671: if (dof != 1) memc = PETSC_FALSE;
1672: if (memc) {
1673: PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof);
1674: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1675: PetscReal *bcoords = pcbddc->mat_graph->coords;
1676: PetscInt i, b, d;
1678: for (i = 0; i < n; i++) {
1679: for (b = 0; b < dof; b++) {
1680: for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1681: }
1682: }
1683: }
1684: VecRestoreArrayRead(cvec, &coords);
1685: pcbddc->mat_graph->cdim = cdim;
1686: pcbddc->mat_graph->cnloc = dof * n;
1687: pcbddc->mat_graph->cloc = PETSC_FALSE;
1688: }
1689: DMDAGetSubdomainCornersIS(dm, &corners);
1690: MatISGetLocalMat(pc->pmat, &lA);
1691: MatGetLocalToGlobalMapping(lA, &l2l, NULL);
1692: MatISRestoreLocalMat(pc->pmat, &lA);
1693: lo = (PetscBool)(l2l && corners);
1694: MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc));
1695: if (gl) { /* From PETSc's DMDA */
1696: const PetscInt *idx;
1697: PetscInt dof, bs, *idxout, n;
1699: DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL);
1700: ISLocalToGlobalMappingGetBlockSize(l2l, &bs);
1701: ISGetLocalSize(corners, &n);
1702: ISGetIndices(corners, &idx);
1703: if (bs == dof) {
1704: PetscMalloc1(n, &idxout);
1705: ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout);
1706: } else { /* the original DMDA local-to-local map have been modified */
1707: PetscInt i, d;
1709: PetscMalloc1(dof * n, &idxout);
1710: for (i = 0; i < n; i++)
1711: for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1712: ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout);
1714: bs = 1;
1715: n *= dof;
1716: }
1717: ISRestoreIndices(corners, &idx);
1718: DMDARestoreSubdomainCornersIS(dm, &corners);
1719: ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners);
1720: PCBDDCAddPrimalVerticesLocalIS(pc, corners);
1721: ISDestroy(&corners);
1722: pcbddc->corner_selected = PETSC_TRUE;
1723: pcbddc->corner_selection = PETSC_TRUE;
1724: }
1725: if (corners) DMDARestoreSubdomainCornersIS(dm, &corners);
1726: }
1727: }
1728: }
1729: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1730: DM dm;
1732: MatGetDM(pc->pmat, &dm);
1733: if (!dm) PCGetDM(pc, &dm);
1734: if (dm) { /* this can get very expensive, I need to find a faster alternative */
1735: Vec vcoords;
1736: PetscSection section;
1737: PetscReal *coords;
1738: PetscInt d, cdim, nl, nf, **ctxs;
1739: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1740: /* debug coordinates */
1741: PetscViewer viewer;
1742: PetscBool flg;
1743: PetscViewerFormat format;
1744: const char *prefix;
1746: DMGetCoordinateDim(dm, &cdim);
1747: DMGetLocalSection(dm, §ion);
1748: PetscSectionGetNumFields(section, &nf);
1749: DMCreateGlobalVector(dm, &vcoords);
1750: VecGetLocalSize(vcoords, &nl);
1751: PetscMalloc1(nl * cdim, &coords);
1752: PetscMalloc2(nf, &funcs, nf, &ctxs);
1753: PetscMalloc1(nf, &ctxs[0]);
1754: for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1755: for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1757: /* debug coordinates */
1758: PCGetOptionsPrefix(pc, &prefix);
1759: PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg);
1760: if (flg) PetscViewerPushFormat(viewer, format);
1761: for (d = 0; d < cdim; d++) {
1762: PetscInt i;
1763: const PetscScalar *v;
1764: char name[16];
1766: for (i = 0; i < nf; i++) ctxs[i][0] = d;
1767: PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d);
1768: PetscObjectSetName((PetscObject)vcoords, name);
1769: DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords);
1770: if (flg) VecView(vcoords, viewer);
1771: VecGetArrayRead(vcoords, &v);
1772: for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1773: VecRestoreArrayRead(vcoords, &v);
1774: }
1775: VecDestroy(&vcoords);
1776: PCSetCoordinates(pc, cdim, nl, coords);
1777: PetscFree(coords);
1778: PetscFree(ctxs[0]);
1779: PetscFree2(funcs, ctxs);
1780: if (flg) {
1781: PetscViewerPopFormat(viewer);
1782: PetscViewerDestroy(&viewer);
1783: }
1784: }
1785: }
1786: return 0;
1787: }
1789: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1790: {
1791: Mat_IS *matis = (Mat_IS *)(pc->pmat->data);
1792: IS nis;
1793: const PetscInt *idxs;
1794: PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd;
1797: if (mop == MPI_LAND) {
1798: /* init rootdata with true */
1799: for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1800: } else {
1801: PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n);
1802: }
1803: PetscArrayzero(matis->sf_leafdata, n);
1804: ISGetLocalSize(*is, &nd);
1805: ISGetIndices(*is, &idxs);
1806: for (i = 0; i < nd; i++)
1807: if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1808: ISRestoreIndices(*is, &idxs);
1809: PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop);
1810: PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop);
1811: PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
1812: PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
1813: if (mop == MPI_LAND) {
1814: PetscMalloc1(nd, &nidxs);
1815: } else {
1816: PetscMalloc1(n, &nidxs);
1817: }
1818: for (i = 0, nnd = 0; i < n; i++)
1819: if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
1820: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis);
1821: ISDestroy(is);
1822: *is = nis;
1823: return 0;
1824: }
1826: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
1827: {
1828: PC_IS *pcis = (PC_IS *)(pc->data);
1829: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
1831: if (!pcbddc->benign_have_null) return 0;
1832: if (pcbddc->ChangeOfBasisMatrix) {
1833: Vec swap;
1835: MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change);
1836: swap = pcbddc->work_change;
1837: pcbddc->work_change = r;
1838: r = swap;
1839: }
1840: VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD);
1841: VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD);
1842: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0);
1843: KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D);
1844: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0);
1845: KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D);
1846: VecSet(z, 0.);
1847: VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE);
1848: VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE);
1849: if (pcbddc->ChangeOfBasisMatrix) {
1850: pcbddc->work_change = r;
1851: VecCopy(z, pcbddc->work_change);
1852: MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z);
1853: }
1854: return 0;
1855: }
1857: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1858: {
1859: PCBDDCBenignMatMult_ctx ctx;
1860: PetscBool apply_right, apply_left, reset_x;
1862: MatShellGetContext(A, &ctx);
1863: if (transpose) {
1864: apply_right = ctx->apply_left;
1865: apply_left = ctx->apply_right;
1866: } else {
1867: apply_right = ctx->apply_right;
1868: apply_left = ctx->apply_left;
1869: }
1870: reset_x = PETSC_FALSE;
1871: if (apply_right) {
1872: const PetscScalar *ax;
1873: PetscInt nl, i;
1875: VecGetLocalSize(x, &nl);
1876: VecGetArrayRead(x, &ax);
1877: PetscArraycpy(ctx->work, ax, nl);
1878: VecRestoreArrayRead(x, &ax);
1879: for (i = 0; i < ctx->benign_n; i++) {
1880: PetscScalar sum, val;
1881: const PetscInt *idxs;
1882: PetscInt nz, j;
1883: ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz);
1884: ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs);
1885: sum = 0.;
1886: if (ctx->apply_p0) {
1887: val = ctx->work[idxs[nz - 1]];
1888: for (j = 0; j < nz - 1; j++) {
1889: sum += ctx->work[idxs[j]];
1890: ctx->work[idxs[j]] += val;
1891: }
1892: } else {
1893: for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
1894: }
1895: ctx->work[idxs[nz - 1]] -= sum;
1896: ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs);
1897: }
1898: VecPlaceArray(x, ctx->work);
1899: reset_x = PETSC_TRUE;
1900: }
1901: if (transpose) {
1902: MatMultTranspose(ctx->A, x, y);
1903: } else {
1904: MatMult(ctx->A, x, y);
1905: }
1906: if (reset_x) VecResetArray(x);
1907: if (apply_left) {
1908: PetscScalar *ay;
1909: PetscInt i;
1911: VecGetArray(y, &ay);
1912: for (i = 0; i < ctx->benign_n; i++) {
1913: PetscScalar sum, val;
1914: const PetscInt *idxs;
1915: PetscInt nz, j;
1916: ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz);
1917: ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs);
1918: val = -ay[idxs[nz - 1]];
1919: if (ctx->apply_p0) {
1920: sum = 0.;
1921: for (j = 0; j < nz - 1; j++) {
1922: sum += ay[idxs[j]];
1923: ay[idxs[j]] += val;
1924: }
1925: ay[idxs[nz - 1]] += sum;
1926: } else {
1927: for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
1928: ay[idxs[nz - 1]] = 0.;
1929: }
1930: ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs);
1931: }
1932: VecRestoreArray(y, &ay);
1933: }
1934: return 0;
1935: }
1937: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1938: {
1939: PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE);
1940: return 0;
1941: }
1943: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1944: {
1945: PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE);
1946: return 0;
1947: }
1949: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1950: {
1951: PC_IS *pcis = (PC_IS *)pc->data;
1952: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1953: PCBDDCBenignMatMult_ctx ctx;
1955: if (!restore) {
1956: Mat A_IB, A_BI;
1957: PetscScalar *work;
1958: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
1961: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return 0;
1962: PetscMalloc1(pcis->n, &work);
1963: MatCreate(PETSC_COMM_SELF, &A_IB);
1964: MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE);
1965: MatSetType(A_IB, MATSHELL);
1966: MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private);
1967: MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private);
1968: PetscNew(&ctx);
1969: MatShellSetContext(A_IB, ctx);
1970: ctx->apply_left = PETSC_TRUE;
1971: ctx->apply_right = PETSC_FALSE;
1972: ctx->apply_p0 = PETSC_FALSE;
1973: ctx->benign_n = pcbddc->benign_n;
1974: if (reuse) {
1975: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
1976: ctx->free = PETSC_FALSE;
1977: } else { /* TODO: could be optimized for successive solves */
1978: ISLocalToGlobalMapping N_to_D;
1979: PetscInt i;
1981: ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D);
1982: PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs);
1983: for (i = 0; i < pcbddc->benign_n; i++) ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]);
1984: ISLocalToGlobalMappingDestroy(&N_to_D);
1985: ctx->free = PETSC_TRUE;
1986: }
1987: ctx->A = pcis->A_IB;
1988: ctx->work = work;
1989: MatSetUp(A_IB);
1990: MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY);
1991: MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY);
1992: pcis->A_IB = A_IB;
1994: /* A_BI as A_IB^T */
1995: MatCreateTranspose(A_IB, &A_BI);
1996: pcbddc->benign_original_mat = pcis->A_BI;
1997: pcis->A_BI = A_BI;
1998: } else {
1999: if (!pcbddc->benign_original_mat) return 0;
2000: MatShellGetContext(pcis->A_IB, &ctx);
2001: MatDestroy(&pcis->A_IB);
2002: pcis->A_IB = ctx->A;
2003: ctx->A = NULL;
2004: MatDestroy(&pcis->A_BI);
2005: pcis->A_BI = pcbddc->benign_original_mat;
2006: pcbddc->benign_original_mat = NULL;
2007: if (ctx->free) {
2008: PetscInt i;
2009: for (i = 0; i < ctx->benign_n; i++) ISDestroy(&ctx->benign_zerodiag_subs[i]);
2010: PetscFree(ctx->benign_zerodiag_subs);
2011: }
2012: PetscFree(ctx->work);
2013: PetscFree(ctx);
2014: }
2015: return 0;
2016: }
2018: /* used just in bddc debug mode */
2019: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2020: {
2021: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2022: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2023: Mat An;
2025: MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An);
2026: MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL);
2027: if (is1) {
2028: MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B);
2029: MatDestroy(&An);
2030: } else {
2031: *B = An;
2032: }
2033: return 0;
2034: }
2036: /* TODO: add reuse flag */
2037: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2038: {
2039: Mat Bt;
2040: PetscScalar *a, *bdata;
2041: const PetscInt *ii, *ij;
2042: PetscInt m, n, i, nnz, *bii, *bij;
2043: PetscBool flg_row;
2045: MatGetSize(A, &n, &m);
2046: MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row);
2047: MatSeqAIJGetArray(A, &a);
2048: nnz = n;
2049: for (i = 0; i < ii[n]; i++) {
2050: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2051: }
2052: PetscMalloc1(n + 1, &bii);
2053: PetscMalloc1(nnz, &bij);
2054: PetscMalloc1(nnz, &bdata);
2055: nnz = 0;
2056: bii[0] = 0;
2057: for (i = 0; i < n; i++) {
2058: PetscInt j;
2059: for (j = ii[i]; j < ii[i + 1]; j++) {
2060: PetscScalar entry = a[j];
2061: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2062: bij[nnz] = ij[j];
2063: bdata[nnz] = entry;
2064: nnz++;
2065: }
2066: }
2067: bii[i + 1] = nnz;
2068: }
2069: MatSeqAIJRestoreArray(A, &a);
2070: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt);
2071: MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row);
2072: {
2073: Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data);
2074: b->free_a = PETSC_TRUE;
2075: b->free_ij = PETSC_TRUE;
2076: }
2077: if (*B == A) MatDestroy(&A);
2078: *B = Bt;
2079: return 0;
2080: }
2082: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2083: {
2084: Mat B = NULL;
2085: DM dm;
2086: IS is_dummy, *cc_n;
2087: ISLocalToGlobalMapping l2gmap_dummy;
2088: PCBDDCGraph graph;
2089: PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL;
2090: PetscInt i, n;
2091: PetscInt *xadj, *adjncy;
2092: PetscBool isplex = PETSC_FALSE;
2094: if (ncc) *ncc = 0;
2095: if (cc) *cc = NULL;
2096: if (primalv) *primalv = NULL;
2097: PCBDDCGraphCreate(&graph);
2098: MatGetDM(pc->pmat, &dm);
2099: if (!dm) PCGetDM(pc, &dm);
2100: if (dm) PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex);
2101: if (filter) isplex = PETSC_FALSE;
2103: if (isplex) { /* this code has been modified from plexpartition.c */
2104: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2105: PetscInt *adj = NULL;
2106: IS cellNumbering;
2107: const PetscInt *cellNum;
2108: PetscBool useCone, useClosure;
2109: PetscSection section;
2110: PetscSegBuffer adjBuffer;
2111: PetscSF sfPoint;
2113: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2114: DMGetPointSF(dm, &sfPoint);
2115: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2116: /* Build adjacency graph via a section/segbuffer */
2117: PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion);
2118: PetscSectionSetChart(section, pStart, pEnd);
2119: PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer);
2120: /* Always use FVM adjacency to create partitioner graph */
2121: DMGetBasicAdjacency(dm, &useCone, &useClosure);
2122: DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2123: DMPlexGetCellNumbering(dm, &cellNumbering);
2124: ISGetIndices(cellNumbering, &cellNum);
2125: for (n = 0, p = pStart; p < pEnd; p++) {
2126: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2127: if (nroots > 0) {
2128: if (cellNum[p] < 0) continue;
2129: }
2130: adjSize = PETSC_DETERMINE;
2131: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2132: for (a = 0; a < adjSize; ++a) {
2133: const PetscInt point = adj[a];
2134: if (pStart <= point && point < pEnd) {
2135: PetscInt *PETSC_RESTRICT pBuf;
2136: PetscSectionAddDof(section, p, 1);
2137: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2138: *pBuf = point;
2139: }
2140: }
2141: n++;
2142: }
2143: DMSetBasicAdjacency(dm, useCone, useClosure);
2144: /* Derive CSR graph from section/segbuffer */
2145: PetscSectionSetUp(section);
2146: PetscSectionGetStorageSize(section, &size);
2147: PetscMalloc1(n + 1, &xadj);
2148: for (idx = 0, p = pStart; p < pEnd; p++) {
2149: if (nroots > 0) {
2150: if (cellNum[p] < 0) continue;
2151: }
2152: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2153: }
2154: xadj[n] = size;
2155: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2156: /* Clean up */
2157: PetscSegBufferDestroy(&adjBuffer);
2158: PetscSectionDestroy(§ion);
2159: PetscFree(adj);
2160: graph->xadj = xadj;
2161: graph->adjncy = adjncy;
2162: } else {
2163: Mat A;
2164: PetscBool isseqaij, flg_row;
2166: MatISGetLocalMat(pc->pmat, &A);
2167: if (!A->rmap->N || !A->cmap->N) {
2168: PCBDDCGraphDestroy(&graph);
2169: return 0;
2170: }
2171: PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij);
2172: if (!isseqaij && filter) {
2173: PetscBool isseqdense;
2175: PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense);
2176: if (!isseqdense) {
2177: MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B);
2178: } else { /* TODO: rectangular case and LDA */
2179: PetscScalar *array;
2180: PetscReal chop = 1.e-6;
2182: MatDuplicate(A, MAT_COPY_VALUES, &B);
2183: MatDenseGetArray(B, &array);
2184: MatGetSize(B, &n, NULL);
2185: for (i = 0; i < n; i++) {
2186: PetscInt j;
2187: for (j = i + 1; j < n; j++) {
2188: PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2189: if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2190: if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2191: }
2192: }
2193: MatDenseRestoreArray(B, &array);
2194: MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B);
2195: }
2196: } else {
2197: PetscObjectReference((PetscObject)A);
2198: B = A;
2199: }
2200: MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row);
2202: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2203: if (filter) {
2204: PetscScalar *data;
2205: PetscInt j, cum;
2207: PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered);
2208: MatSeqAIJGetArray(B, &data);
2209: cum = 0;
2210: for (i = 0; i < n; i++) {
2211: PetscInt t;
2213: for (j = xadj[i]; j < xadj[i + 1]; j++) {
2214: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2215: adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2216: }
2217: t = xadj_filtered[i];
2218: xadj_filtered[i] = cum;
2219: cum += t;
2220: }
2221: MatSeqAIJRestoreArray(B, &data);
2222: graph->xadj = xadj_filtered;
2223: graph->adjncy = adjncy_filtered;
2224: } else {
2225: graph->xadj = xadj;
2226: graph->adjncy = adjncy;
2227: }
2228: }
2229: /* compute local connected components using PCBDDCGraph */
2230: ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy);
2231: ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy);
2232: ISDestroy(&is_dummy);
2233: PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT);
2234: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2235: PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL);
2236: PCBDDCGraphComputeConnectedComponents(graph);
2238: /* partial clean up */
2239: PetscFree2(xadj_filtered, adjncy_filtered);
2240: if (B) {
2241: PetscBool flg_row;
2242: MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row);
2243: MatDestroy(&B);
2244: }
2245: if (isplex) {
2246: PetscFree(xadj);
2247: PetscFree(adjncy);
2248: }
2250: /* get back data */
2251: if (isplex) {
2252: if (ncc) *ncc = graph->ncc;
2253: if (cc || primalv) {
2254: Mat A;
2255: PetscBT btv, btvt;
2256: PetscSection subSection;
2257: PetscInt *ids, cum, cump, *cids, *pids;
2259: DMPlexGetSubdomainSection(dm, &subSection);
2260: MatISGetLocalMat(pc->pmat, &A);
2261: PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids);
2262: PetscBTCreate(A->rmap->n, &btv);
2263: PetscBTCreate(A->rmap->n, &btvt);
2265: cids[0] = 0;
2266: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2267: PetscInt j;
2269: PetscBTMemzero(A->rmap->n, btvt);
2270: for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2271: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2273: DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure);
2274: for (k = 0; k < 2 * size; k += 2) {
2275: PetscInt s, pp, p = closure[k], off, dof, cdof;
2277: PetscSectionGetConstraintDof(subSection, p, &cdof);
2278: PetscSectionGetOffset(subSection, p, &off);
2279: PetscSectionGetDof(subSection, p, &dof);
2280: for (s = 0; s < dof - cdof; s++) {
2281: if (PetscBTLookupSet(btvt, off + s)) continue;
2282: if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2283: else pids[cump++] = off + s; /* cross-vertex */
2284: }
2285: DMPlexGetTreeParent(dm, p, &pp, NULL);
2286: if (pp != p) {
2287: PetscSectionGetConstraintDof(subSection, pp, &cdof);
2288: PetscSectionGetOffset(subSection, pp, &off);
2289: PetscSectionGetDof(subSection, pp, &dof);
2290: for (s = 0; s < dof - cdof; s++) {
2291: if (PetscBTLookupSet(btvt, off + s)) continue;
2292: if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2293: else pids[cump++] = off + s; /* cross-vertex */
2294: }
2295: }
2296: }
2297: DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure);
2298: }
2299: cids[i + 1] = cum;
2300: /* mark dofs as already assigned */
2301: for (j = cids[i]; j < cids[i + 1]; j++) PetscBTSet(btv, ids[j]);
2302: }
2303: if (cc) {
2304: PetscMalloc1(graph->ncc, &cc_n);
2305: for (i = 0; i < graph->ncc; i++) ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]);
2306: *cc = cc_n;
2307: }
2308: if (primalv) ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv);
2309: PetscFree3(ids, cids, pids);
2310: PetscBTDestroy(&btv);
2311: PetscBTDestroy(&btvt);
2312: }
2313: } else {
2314: if (ncc) *ncc = graph->ncc;
2315: if (cc) {
2316: PetscMalloc1(graph->ncc, &cc_n);
2317: for (i = 0; i < graph->ncc; i++) ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]);
2318: *cc = cc_n;
2319: }
2320: }
2321: /* clean up graph */
2322: graph->xadj = NULL;
2323: graph->adjncy = NULL;
2324: PCBDDCGraphDestroy(&graph);
2325: return 0;
2326: }
2328: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2329: {
2330: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2331: PC_IS *pcis = (PC_IS *)(pc->data);
2332: IS dirIS = NULL;
2333: PetscInt i;
2335: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS);
2336: if (zerodiag) {
2337: Mat A;
2338: Vec vec3_N;
2339: PetscScalar *vals;
2340: const PetscInt *idxs;
2341: PetscInt nz, *count;
2343: /* p0 */
2344: VecSet(pcis->vec1_N, 0.);
2345: PetscMalloc1(pcis->n, &vals);
2346: ISGetLocalSize(zerodiag, &nz);
2347: ISGetIndices(zerodiag, &idxs);
2348: for (i = 0; i < nz; i++) vals[i] = 1.;
2349: VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES);
2350: VecAssemblyBegin(pcis->vec1_N);
2351: VecAssemblyEnd(pcis->vec1_N);
2352: /* v_I */
2353: VecSetRandom(pcis->vec2_N, NULL);
2354: for (i = 0; i < nz; i++) vals[i] = 0.;
2355: VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES);
2356: ISRestoreIndices(zerodiag, &idxs);
2357: ISGetIndices(pcis->is_B_local, &idxs);
2358: for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2359: VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES);
2360: ISRestoreIndices(pcis->is_B_local, &idxs);
2361: if (dirIS) {
2362: PetscInt n;
2364: ISGetLocalSize(dirIS, &n);
2365: ISGetIndices(dirIS, &idxs);
2366: for (i = 0; i < n; i++) vals[i] = 0.;
2367: VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES);
2368: ISRestoreIndices(dirIS, &idxs);
2369: }
2370: VecAssemblyBegin(pcis->vec2_N);
2371: VecAssemblyEnd(pcis->vec2_N);
2372: VecDuplicate(pcis->vec1_N, &vec3_N);
2373: VecSet(vec3_N, 0.);
2374: MatISGetLocalMat(pc->pmat, &A);
2375: MatMult(A, pcis->vec1_N, vec3_N);
2376: VecDot(vec3_N, pcis->vec2_N, &vals[0]);
2378: PetscFree(vals);
2379: VecDestroy(&vec3_N);
2381: /* there should not be any pressure dofs lying on the interface */
2382: PetscCalloc1(pcis->n, &count);
2383: ISGetIndices(pcis->is_B_local, &idxs);
2384: for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2385: ISRestoreIndices(pcis->is_B_local, &idxs);
2386: ISGetIndices(zerodiag, &idxs);
2388: ISRestoreIndices(zerodiag, &idxs);
2389: PetscFree(count);
2390: }
2391: ISDestroy(&dirIS);
2393: /* check PCBDDCBenignGetOrSetP0 */
2394: VecSetRandom(pcis->vec1_global, NULL);
2395: for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2396: PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE);
2397: for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2398: PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE);
2399: for (i = 0; i < pcbddc->benign_n; i++) {
2400: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2402: }
2403: return 0;
2404: }
2406: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2407: {
2408: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2409: Mat_IS *matis = (Mat_IS *)(pc->pmat->data);
2410: IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2411: PetscInt nz, n, benign_n, bsp = 1;
2412: PetscInt *interior_dofs, n_interior_dofs, nneu;
2413: PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2415: if (reuse) goto project_b0;
2416: PetscSFDestroy(&pcbddc->benign_sf);
2417: MatDestroy(&pcbddc->benign_B0);
2418: for (n = 0; n < pcbddc->benign_n; n++) ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2419: PetscFree(pcbddc->benign_zerodiag_subs);
2420: has_null_pressures = PETSC_TRUE;
2421: have_null = PETSC_TRUE;
2422: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2423: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2424: Checks if all the pressure dofs in each subdomain have a zero diagonal
2425: If not, a change of basis on pressures is not needed
2426: since the local Schur complements are already SPD
2427: */
2428: if (pcbddc->n_ISForDofsLocal) {
2429: IS iP = NULL;
2430: PetscInt p, *pp;
2431: PetscBool flg;
2433: PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp);
2434: n = pcbddc->n_ISForDofsLocal;
2435: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2436: PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg);
2437: PetscOptionsEnd();
2438: if (!flg) {
2439: n = 1;
2440: pp[0] = pcbddc->n_ISForDofsLocal - 1;
2441: }
2443: bsp = 0;
2444: for (p = 0; p < n; p++) {
2445: PetscInt bs;
2448: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs);
2449: bsp += bs;
2450: }
2451: PetscMalloc1(bsp, &bzerodiag);
2452: bsp = 0;
2453: for (p = 0; p < n; p++) {
2454: const PetscInt *idxs;
2455: PetscInt b, bs, npl, *bidxs;
2457: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs);
2458: ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl);
2459: ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs);
2460: PetscMalloc1(npl / bs, &bidxs);
2461: for (b = 0; b < bs; b++) {
2462: PetscInt i;
2464: for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2465: ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]);
2466: bsp++;
2467: }
2468: PetscFree(bidxs);
2469: ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs);
2470: }
2471: ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures);
2473: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2474: PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP);
2475: if (iP) {
2476: IS newpressures;
2478: ISDifference(pressures, iP, &newpressures);
2479: ISDestroy(&pressures);
2480: pressures = newpressures;
2481: }
2482: ISSorted(pressures, &sorted);
2483: if (!sorted) ISSort(pressures);
2484: PetscFree(pp);
2485: }
2487: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2488: MatGetLocalSize(pcbddc->local_mat, &n, NULL);
2489: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2490: MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag);
2491: ISSorted(zerodiag, &sorted);
2492: if (!sorted) ISSort(zerodiag);
2493: PetscObjectReference((PetscObject)zerodiag);
2494: zerodiag_save = zerodiag;
2495: ISGetLocalSize(zerodiag, &nz);
2496: if (!nz) {
2497: if (n) have_null = PETSC_FALSE;
2498: has_null_pressures = PETSC_FALSE;
2499: ISDestroy(&zerodiag);
2500: }
2501: recompute_zerodiag = PETSC_FALSE;
2503: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2504: zerodiag_subs = NULL;
2505: benign_n = 0;
2506: n_interior_dofs = 0;
2507: interior_dofs = NULL;
2508: nneu = 0;
2509: if (pcbddc->NeumannBoundariesLocal) ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu);
2510: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2511: if (checkb) { /* need to compute interior nodes */
2512: PetscInt n, i, j;
2513: PetscInt n_neigh, *neigh, *n_shared, **shared;
2514: PetscInt *iwork;
2516: ISLocalToGlobalMappingGetSize(matis->rmapping, &n);
2517: ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared);
2518: PetscCalloc1(n, &iwork);
2519: PetscMalloc1(n, &interior_dofs);
2520: for (i = 1; i < n_neigh; i++)
2521: for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1;
2522: for (i = 0; i < n; i++)
2523: if (!iwork[i]) interior_dofs[n_interior_dofs++] = i;
2524: PetscFree(iwork);
2525: ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared);
2526: }
2527: if (has_null_pressures) {
2528: IS *subs;
2529: PetscInt nsubs, i, j, nl;
2530: const PetscInt *idxs;
2531: PetscScalar *array;
2532: Vec *work;
2534: subs = pcbddc->local_subs;
2535: nsubs = pcbddc->n_local_subs;
2536: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2537: if (checkb) {
2538: VecDuplicateVecs(matis->y, 2, &work);
2539: ISGetLocalSize(zerodiag, &nl);
2540: ISGetIndices(zerodiag, &idxs);
2541: /* work[0] = 1_p */
2542: VecSet(work[0], 0.);
2543: VecGetArray(work[0], &array);
2544: for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2545: VecRestoreArray(work[0], &array);
2546: /* work[0] = 1_v */
2547: VecSet(work[1], 1.);
2548: VecGetArray(work[1], &array);
2549: for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2550: VecRestoreArray(work[1], &array);
2551: ISRestoreIndices(zerodiag, &idxs);
2552: }
2554: if (nsubs > 1 || bsp > 1) {
2555: IS *is;
2556: PetscInt b, totb;
2558: totb = bsp;
2559: is = bsp > 1 ? bzerodiag : &zerodiag;
2560: nsubs = PetscMax(nsubs, 1);
2561: PetscCalloc1(nsubs * totb, &zerodiag_subs);
2562: for (b = 0; b < totb; b++) {
2563: for (i = 0; i < nsubs; i++) {
2564: ISLocalToGlobalMapping l2g;
2565: IS t_zerodiag_subs;
2566: PetscInt nl;
2568: if (subs) {
2569: ISLocalToGlobalMappingCreateIS(subs[i], &l2g);
2570: } else {
2571: IS tis;
2573: MatGetLocalSize(pcbddc->local_mat, &nl, NULL);
2574: ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis);
2575: ISLocalToGlobalMappingCreateIS(tis, &l2g);
2576: ISDestroy(&tis);
2577: }
2578: ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs);
2579: ISGetLocalSize(t_zerodiag_subs, &nl);
2580: if (nl) {
2581: PetscBool valid = PETSC_TRUE;
2583: if (checkb) {
2584: VecSet(matis->x, 0);
2585: ISGetLocalSize(subs[i], &nl);
2586: ISGetIndices(subs[i], &idxs);
2587: VecGetArray(matis->x, &array);
2588: for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2589: VecRestoreArray(matis->x, &array);
2590: ISRestoreIndices(subs[i], &idxs);
2591: VecPointwiseMult(matis->x, work[0], matis->x);
2592: MatMult(matis->A, matis->x, matis->y);
2593: VecPointwiseMult(matis->y, work[1], matis->y);
2594: VecGetArray(matis->y, &array);
2595: for (j = 0; j < n_interior_dofs; j++) {
2596: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2597: valid = PETSC_FALSE;
2598: break;
2599: }
2600: }
2601: VecRestoreArray(matis->y, &array);
2602: }
2603: if (valid && nneu) {
2604: const PetscInt *idxs;
2605: PetscInt nzb;
2607: ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs);
2608: ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL);
2609: ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs);
2610: if (nzb) valid = PETSC_FALSE;
2611: }
2612: if (valid && pressures) {
2613: IS t_pressure_subs, tmp;
2614: PetscInt i1, i2;
2616: ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs);
2617: ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp);
2618: ISGetLocalSize(tmp, &i1);
2619: ISGetLocalSize(t_zerodiag_subs, &i2);
2620: if (i2 != i1) valid = PETSC_FALSE;
2621: ISDestroy(&t_pressure_subs);
2622: ISDestroy(&tmp);
2623: }
2624: if (valid) {
2625: ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]);
2626: benign_n++;
2627: } else recompute_zerodiag = PETSC_TRUE;
2628: }
2629: ISDestroy(&t_zerodiag_subs);
2630: ISLocalToGlobalMappingDestroy(&l2g);
2631: }
2632: }
2633: } else { /* there's just one subdomain (or zero if they have not been detected */
2634: PetscBool valid = PETSC_TRUE;
2636: if (nneu) valid = PETSC_FALSE;
2637: if (valid && pressures) ISEqual(pressures, zerodiag, &valid);
2638: if (valid && checkb) {
2639: MatMult(matis->A, work[0], matis->x);
2640: VecPointwiseMult(matis->x, work[1], matis->x);
2641: VecGetArray(matis->x, &array);
2642: for (j = 0; j < n_interior_dofs; j++) {
2643: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2644: valid = PETSC_FALSE;
2645: break;
2646: }
2647: }
2648: VecRestoreArray(matis->x, &array);
2649: }
2650: if (valid) {
2651: benign_n = 1;
2652: PetscMalloc1(benign_n, &zerodiag_subs);
2653: PetscObjectReference((PetscObject)zerodiag);
2654: zerodiag_subs[0] = zerodiag;
2655: }
2656: }
2657: if (checkb) VecDestroyVecs(2, &work);
2658: }
2659: PetscFree(interior_dofs);
2661: if (!benign_n) {
2662: PetscInt n;
2664: ISDestroy(&zerodiag);
2665: recompute_zerodiag = PETSC_FALSE;
2666: MatGetLocalSize(pcbddc->local_mat, &n, NULL);
2667: if (n) have_null = PETSC_FALSE;
2668: }
2670: /* final check for null pressures */
2671: if (zerodiag && pressures) ISEqual(pressures, zerodiag, &have_null);
2673: if (recompute_zerodiag) {
2674: ISDestroy(&zerodiag);
2675: if (benign_n == 1) {
2676: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2677: zerodiag = zerodiag_subs[0];
2678: } else {
2679: PetscInt i, nzn, *new_idxs;
2681: nzn = 0;
2682: for (i = 0; i < benign_n; i++) {
2683: PetscInt ns;
2684: ISGetLocalSize(zerodiag_subs[i], &ns);
2685: nzn += ns;
2686: }
2687: PetscMalloc1(nzn, &new_idxs);
2688: nzn = 0;
2689: for (i = 0; i < benign_n; i++) {
2690: PetscInt ns, *idxs;
2691: ISGetLocalSize(zerodiag_subs[i], &ns);
2692: ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs);
2693: PetscArraycpy(new_idxs + nzn, idxs, ns);
2694: ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs);
2695: nzn += ns;
2696: }
2697: PetscSortInt(nzn, new_idxs);
2698: ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag);
2699: }
2700: have_null = PETSC_FALSE;
2701: }
2703: /* determines if the coarse solver will be singular or not */
2704: MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc));
2706: /* Prepare matrix to compute no-net-flux */
2707: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2708: Mat A, loc_divudotp;
2709: ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2710: IS row, col, isused = NULL;
2711: PetscInt M, N, n, st, n_isused;
2713: if (pressures) {
2714: isused = pressures;
2715: } else {
2716: isused = zerodiag_save;
2717: }
2718: MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL);
2719: MatISGetLocalMat(pc->pmat, &A);
2720: MatGetLocalSize(A, &n, NULL);
2722: n_isused = 0;
2723: if (isused) ISGetLocalSize(isused, &n_isused);
2724: MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
2725: st = st - n_isused;
2726: if (n) {
2727: const PetscInt *gidxs;
2729: MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp);
2730: ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs);
2731: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2732: ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row);
2733: ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col);
2734: ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs);
2735: } else {
2736: MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp);
2737: ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row);
2738: ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col);
2739: }
2740: MatGetSize(pc->pmat, NULL, &N);
2741: ISGetSize(row, &M);
2742: ISLocalToGlobalMappingCreateIS(row, &rl2g);
2743: ISLocalToGlobalMappingCreateIS(col, &cl2g);
2744: ISDestroy(&row);
2745: ISDestroy(&col);
2746: MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp);
2747: MatSetType(pcbddc->divudotp, MATIS);
2748: MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N);
2749: MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g);
2750: ISLocalToGlobalMappingDestroy(&rl2g);
2751: ISLocalToGlobalMappingDestroy(&cl2g);
2752: MatISSetLocalMat(pcbddc->divudotp, loc_divudotp);
2753: MatDestroy(&loc_divudotp);
2754: MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY);
2755: MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY);
2756: }
2757: ISDestroy(&zerodiag_save);
2758: ISDestroy(&pressures);
2759: if (bzerodiag) {
2760: PetscInt i;
2762: for (i = 0; i < bsp; i++) ISDestroy(&bzerodiag[i]);
2763: PetscFree(bzerodiag);
2764: }
2765: pcbddc->benign_n = benign_n;
2766: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2768: /* determines if the problem has subdomains with 0 pressure block */
2769: have_null = (PetscBool)(!!pcbddc->benign_n);
2770: MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
2772: project_b0:
2773: MatGetLocalSize(pcbddc->local_mat, &n, NULL);
2774: /* change of basis and p0 dofs */
2775: if (pcbddc->benign_n) {
2776: PetscInt i, s, *nnz;
2778: /* local change of basis for pressures */
2779: MatDestroy(&pcbddc->benign_change);
2780: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change);
2781: MatSetType(pcbddc->benign_change, MATAIJ);
2782: MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE);
2783: PetscMalloc1(n, &nnz);
2784: for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
2785: for (i = 0; i < pcbddc->benign_n; i++) {
2786: const PetscInt *idxs;
2787: PetscInt nzs, j;
2789: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs);
2790: ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs);
2791: for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
2792: nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */
2793: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs);
2794: }
2795: MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz);
2796: MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
2797: PetscFree(nnz);
2798: /* set identity by default */
2799: for (i = 0; i < n; i++) MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES);
2800: PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0);
2801: PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0);
2802: /* set change on pressures */
2803: for (s = 0; s < pcbddc->benign_n; s++) {
2804: PetscScalar *array;
2805: const PetscInt *idxs;
2806: PetscInt nzs;
2808: ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs);
2809: ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs);
2810: for (i = 0; i < nzs - 1; i++) {
2811: PetscScalar vals[2];
2812: PetscInt cols[2];
2814: cols[0] = idxs[i];
2815: cols[1] = idxs[nzs - 1];
2816: vals[0] = 1.;
2817: vals[1] = 1.;
2818: MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES);
2819: }
2820: PetscMalloc1(nzs, &array);
2821: for (i = 0; i < nzs - 1; i++) array[i] = -1.;
2822: array[nzs - 1] = 1.;
2823: MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES);
2824: /* store local idxs for p0 */
2825: pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
2826: ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs);
2827: PetscFree(array);
2828: }
2829: MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY);
2830: MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY);
2832: /* project if needed */
2833: if (pcbddc->benign_change_explicit) {
2834: Mat M;
2836: MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M);
2837: MatDestroy(&pcbddc->local_mat);
2838: MatSeqAIJCompress(M, &pcbddc->local_mat);
2839: MatDestroy(&M);
2840: }
2841: /* store global idxs for p0 */
2842: ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx);
2843: }
2844: *zerodiaglocal = zerodiag;
2845: return 0;
2846: }
2848: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2849: {
2850: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2851: PetscScalar *array;
2853: if (!pcbddc->benign_sf) {
2854: PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf);
2855: PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx);
2856: }
2857: if (get) {
2858: VecGetArrayRead(v, (const PetscScalar **)&array);
2859: PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE);
2860: PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE);
2861: VecRestoreArrayRead(v, (const PetscScalar **)&array);
2862: } else {
2863: VecGetArray(v, &array);
2864: PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE);
2865: PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE);
2866: VecRestoreArray(v, &array);
2867: }
2868: return 0;
2869: }
2871: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2872: {
2873: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2875: /* TODO: add error checking
2876: - avoid nested pop (or push) calls.
2877: - cannot push before pop.
2878: - cannot call this if pcbddc->local_mat is NULL
2879: */
2880: if (!pcbddc->benign_n) return 0;
2881: if (pop) {
2882: if (pcbddc->benign_change_explicit) {
2883: IS is_p0;
2884: MatReuse reuse;
2886: /* extract B_0 */
2887: reuse = MAT_INITIAL_MATRIX;
2888: if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
2889: ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0);
2890: MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0);
2891: /* remove rows and cols from local problem */
2892: MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);
2893: MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
2894: MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL);
2895: ISDestroy(&is_p0);
2896: } else {
2897: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2898: PetscScalar *vals;
2899: PetscInt i, n, *idxs_ins;
2901: VecGetLocalSize(matis->y, &n);
2902: PetscMalloc2(n, &idxs_ins, n, &vals);
2903: if (!pcbddc->benign_B0) {
2904: PetscInt *nnz;
2905: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0);
2906: MatSetType(pcbddc->benign_B0, MATAIJ);
2907: MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE);
2908: PetscMalloc1(pcbddc->benign_n, &nnz);
2909: for (i = 0; i < pcbddc->benign_n; i++) {
2910: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]);
2911: nnz[i] = n - nnz[i];
2912: }
2913: MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz);
2914: MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
2915: PetscFree(nnz);
2916: }
2918: for (i = 0; i < pcbddc->benign_n; i++) {
2919: PetscScalar *array;
2920: PetscInt *idxs, j, nz, cum;
2922: VecSet(matis->x, 0.);
2923: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz);
2924: ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs);
2925: for (j = 0; j < nz; j++) vals[j] = 1.;
2926: VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES);
2927: VecAssemblyBegin(matis->x);
2928: VecAssemblyEnd(matis->x);
2929: VecSet(matis->y, 0.);
2930: MatMult(matis->A, matis->x, matis->y);
2931: VecGetArray(matis->y, &array);
2932: cum = 0;
2933: for (j = 0; j < n; j++) {
2934: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2935: vals[cum] = array[j];
2936: idxs_ins[cum] = j;
2937: cum++;
2938: }
2939: }
2940: MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES);
2941: VecRestoreArray(matis->y, &array);
2942: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs);
2943: }
2944: MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY);
2945: MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY);
2946: PetscFree2(idxs_ins, vals);
2947: }
2948: } else { /* push */
2949: if (pcbddc->benign_change_explicit) {
2950: PetscInt i;
2952: for (i = 0; i < pcbddc->benign_n; i++) {
2953: PetscScalar *B0_vals;
2954: PetscInt *B0_cols, B0_ncol;
2956: MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals);
2957: MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES);
2958: MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES);
2959: MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES);
2960: MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals);
2961: }
2962: MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY);
2963: MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY);
2964: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
2965: }
2966: return 0;
2967: }
2969: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
2970: {
2971: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2972: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2973: PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork;
2974: PetscBLASInt *B_iwork, *B_ifail;
2975: PetscScalar *work, lwork;
2976: PetscScalar *St, *S, *eigv;
2977: PetscScalar *Sarray, *Starray;
2978: PetscReal *eigs, thresh, lthresh, uthresh;
2979: PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
2980: PetscBool allocated_S_St, upart;
2981: #if defined(PETSC_USE_COMPLEX)
2982: PetscReal *rwork;
2983: #endif
2985: if (!pcbddc->adaptive_selection) return 0;
2989: sub_schurs->is_posdef);
2990: PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0);
2992: if (pcbddc->dbg_flag) {
2993: if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
2994: PetscViewerFlush(pcbddc->dbg_viewer);
2995: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
2996: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n");
2997: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
2998: }
3000: if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef);
3002: /* max size of subsets */
3003: mss = 0;
3004: for (i = 0; i < sub_schurs->n_subs; i++) {
3005: PetscInt subset_size;
3007: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
3008: mss = PetscMax(mss, subset_size);
3009: }
3011: /* min/max and threshold */
3012: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3013: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3014: nmax = PetscMax(nmin, nmax);
3015: allocated_S_St = PETSC_FALSE;
3016: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3017: allocated_S_St = PETSC_TRUE;
3018: }
3020: /* allocate lapack workspace */
3021: cum = cum2 = 0;
3022: maxneigs = 0;
3023: for (i = 0; i < sub_schurs->n_subs; i++) {
3024: PetscInt n, subset_size;
3026: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
3027: n = PetscMin(subset_size, nmax);
3028: cum += subset_size;
3029: cum2 += subset_size * n;
3030: maxneigs = PetscMax(maxneigs, n);
3031: }
3032: lwork = 0;
3033: if (mss) {
3034: if (sub_schurs->is_symmetric) {
3035: PetscScalar sdummy = 0.;
3036: PetscBLASInt B_itype = 1;
3037: PetscBLASInt B_N = mss, idummy = 0;
3038: PetscReal rdummy = 0., zero = 0.0;
3039: PetscReal eps = 0.0; /* dlamch? */
3041: B_lwork = -1;
3042: /* some implementations may complain about NULL pointers, even if we are querying */
3043: S = &sdummy;
3044: St = &sdummy;
3045: eigs = &rdummy;
3046: eigv = &sdummy;
3047: B_iwork = &idummy;
3048: B_ifail = &idummy;
3049: #if defined(PETSC_USE_COMPLEX)
3050: rwork = &rdummy;
3051: #endif
3052: thresh = 1.0;
3053: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3054: #if defined(PETSC_USE_COMPLEX)
3055: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3056: #else
3057: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3058: #endif
3060: PetscFPTrapPop();
3061: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3062: }
3064: nv = 0;
3065: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3066: ISGetLocalSize(sub_schurs->is_vertices, &nv);
3067: }
3068: PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork);
3069: if (allocated_S_St) PetscMalloc2(mss * mss, &S, mss * mss, &St);
3070: PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail);
3071: #if defined(PETSC_USE_COMPLEX)
3072: PetscMalloc1(7 * mss, &rwork);
3073: #endif
3074: PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3075: &pcbddc->adaptive_constraints_data));
3076: PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs);
3078: maxneigs = 0;
3079: cum = cumarray = 0;
3080: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3081: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3082: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3083: const PetscInt *idxs;
3085: ISGetIndices(sub_schurs->is_vertices, &idxs);
3086: for (cum = 0; cum < nv; cum++) {
3087: pcbddc->adaptive_constraints_n[cum] = 1;
3088: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3089: pcbddc->adaptive_constraints_data[cum] = 1.0;
3090: pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3091: pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3092: }
3093: ISRestoreIndices(sub_schurs->is_vertices, &idxs);
3094: }
3096: if (mss) { /* multilevel */
3097: if (sub_schurs->gdsw) {
3098: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray);
3099: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray);
3100: } else {
3101: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray);
3102: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray);
3103: }
3104: }
3106: lthresh = pcbddc->adaptive_threshold[0];
3107: uthresh = pcbddc->adaptive_threshold[1];
3108: upart = pcbddc->use_deluxe_scaling;
3109: for (i = 0; i < sub_schurs->n_subs; i++) {
3110: const PetscInt *idxs;
3111: PetscReal upper, lower;
3112: PetscInt j, subset_size, eigs_start = 0;
3113: PetscBLASInt B_N;
3114: PetscBool same_data = PETSC_FALSE;
3115: PetscBool scal = PETSC_FALSE;
3117: if (upart) {
3118: upper = PETSC_MAX_REAL;
3119: lower = uthresh;
3120: } else {
3121: if (sub_schurs->gdsw) {
3122: upper = uthresh;
3123: lower = PETSC_MIN_REAL;
3124: } else {
3126: upper = 1. / uthresh;
3127: lower = 0.;
3128: }
3129: }
3130: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
3131: ISGetIndices(sub_schurs->is_subs[i], &idxs);
3132: PetscBLASIntCast(subset_size, &B_N);
3133: /* this is experimental: we assume the dofs have been properly grouped to have
3134: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3135: if (!sub_schurs->is_posdef) {
3136: Mat T;
3138: for (j = 0; j < subset_size; j++) {
3139: if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3140: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T);
3141: MatScale(T, -1.0);
3142: MatDestroy(&T);
3143: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T);
3144: MatScale(T, -1.0);
3145: MatDestroy(&T);
3146: if (sub_schurs->change_primal_sub) {
3147: PetscInt nz, k;
3148: const PetscInt *idxs;
3150: ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz);
3151: ISGetIndices(sub_schurs->change_primal_sub[i], &idxs);
3152: for (k = 0; k < nz; k++) {
3153: *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3154: *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3155: }
3156: ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs);
3157: }
3158: scal = PETSC_TRUE;
3159: break;
3160: }
3161: }
3162: }
3164: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3165: if (sub_schurs->is_symmetric) {
3166: PetscInt j, k;
3167: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3168: PetscArrayzero(S, subset_size * subset_size);
3169: PetscArrayzero(St, subset_size * subset_size);
3170: }
3171: for (j = 0; j < subset_size; j++) {
3172: for (k = j; k < subset_size; k++) {
3173: S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k];
3174: St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3175: }
3176: }
3177: } else {
3178: PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3179: PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3180: }
3181: } else {
3182: S = Sarray + cumarray;
3183: St = Starray + cumarray;
3184: }
3185: /* see if we can save some work */
3186: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscArraycmp(S, St, subset_size * subset_size, &same_data);
3188: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3189: B_neigs = 0;
3190: } else {
3191: if (sub_schurs->is_symmetric) {
3192: PetscBLASInt B_itype = 1;
3193: PetscBLASInt B_IL, B_IU;
3194: PetscReal eps = -1.0; /* dlamch? */
3195: PetscInt nmin_s;
3196: PetscBool compute_range;
3198: B_neigs = 0;
3199: compute_range = (PetscBool)!same_data;
3200: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3202: if (pcbddc->dbg_flag) {
3203: PetscInt nc = 0;
3205: if (sub_schurs->change_primal_sub) ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc);
3206: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i,
3207: sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc));
3208: }
3210: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3211: if (compute_range) {
3212: /* ask for eigenvalues larger than thresh */
3213: if (sub_schurs->is_posdef) {
3214: #if defined(PETSC_USE_COMPLEX)
3215: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3216: #else
3217: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3218: #endif
3219: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3220: } else { /* no theory so far, but it works nicely */
3221: PetscInt recipe = 0, recipe_m = 1;
3222: PetscReal bb[2];
3224: PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL);
3225: switch (recipe) {
3226: case 0:
3227: if (scal) {
3228: bb[0] = PETSC_MIN_REAL;
3229: bb[1] = lthresh;
3230: } else {
3231: bb[0] = uthresh;
3232: bb[1] = PETSC_MAX_REAL;
3233: }
3234: #if defined(PETSC_USE_COMPLEX)
3235: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3236: #else
3237: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3238: #endif
3239: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3240: break;
3241: case 1:
3242: bb[0] = PETSC_MIN_REAL;
3243: bb[1] = lthresh * lthresh;
3244: #if defined(PETSC_USE_COMPLEX)
3245: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3246: #else
3247: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3248: #endif
3249: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3250: if (!scal) {
3251: PetscBLASInt B_neigs2 = 0;
3253: bb[0] = PetscMax(lthresh * lthresh, uthresh);
3254: bb[1] = PETSC_MAX_REAL;
3255: PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3256: PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3257: #if defined(PETSC_USE_COMPLEX)
3258: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3259: #else
3260: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3261: #endif
3262: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3263: B_neigs += B_neigs2;
3264: }
3265: break;
3266: case 2:
3267: if (scal) {
3268: bb[0] = PETSC_MIN_REAL;
3269: bb[1] = 0;
3270: #if defined(PETSC_USE_COMPLEX)
3271: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3272: #else
3273: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3274: #endif
3275: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3276: } else {
3277: PetscBLASInt B_neigs2 = 0;
3278: PetscBool import = PETSC_FALSE;
3280: lthresh = PetscMax(lthresh, 0.0);
3281: if (lthresh > 0.0) {
3282: bb[0] = PETSC_MIN_REAL;
3283: bb[1] = lthresh * lthresh;
3285: import = PETSC_TRUE;
3286: #if defined(PETSC_USE_COMPLEX)
3287: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3288: #else
3289: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3290: #endif
3291: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3292: }
3293: bb[0] = PetscMax(lthresh * lthresh, uthresh);
3294: bb[1] = PETSC_MAX_REAL;
3295: if (import) {
3296: PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3297: PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3298: }
3299: #if defined(PETSC_USE_COMPLEX)
3300: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3301: #else
3302: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3303: #endif
3304: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3305: B_neigs += B_neigs2;
3306: }
3307: break;
3308: case 3:
3309: if (scal) {
3310: PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL);
3311: } else {
3312: PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL);
3313: }
3314: if (!scal) {
3315: bb[0] = uthresh;
3316: bb[1] = PETSC_MAX_REAL;
3317: #if defined(PETSC_USE_COMPLEX)
3318: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3319: #else
3320: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3321: #endif
3322: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3323: }
3324: if (recipe_m > 0 && B_N - B_neigs > 0) {
3325: PetscBLASInt B_neigs2 = 0;
3327: B_IL = 1;
3328: PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU);
3329: PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3330: PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3331: #if defined(PETSC_USE_COMPLEX)
3332: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3333: #else
3334: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3335: #endif
3336: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3337: B_neigs += B_neigs2;
3338: }
3339: break;
3340: case 4:
3341: bb[0] = PETSC_MIN_REAL;
3342: bb[1] = lthresh;
3343: #if defined(PETSC_USE_COMPLEX)
3344: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3345: #else
3346: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3347: #endif
3348: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3349: {
3350: PetscBLASInt B_neigs2 = 0;
3352: bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3353: bb[1] = PETSC_MAX_REAL;
3354: PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3355: PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3356: #if defined(PETSC_USE_COMPLEX)
3357: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3358: #else
3359: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3360: #endif
3361: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3362: B_neigs += B_neigs2;
3363: }
3364: break;
3365: case 5: /* same as before: first compute all eigenvalues, then filter */
3366: #if defined(PETSC_USE_COMPLEX)
3367: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3368: #else
3369: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3370: #endif
3371: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3372: {
3373: PetscInt e, k, ne;
3374: for (e = 0, ne = 0; e < B_neigs; e++) {
3375: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3376: for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3377: eigs[ne] = eigs[e];
3378: ne++;
3379: }
3380: }
3381: PetscArraycpy(eigv, S, B_N * ne);
3382: B_neigs = ne;
3383: }
3384: break;
3385: default:
3386: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3387: }
3388: }
3389: } else if (!same_data) { /* this is just to see all the eigenvalues */
3390: B_IU = PetscMax(1, PetscMin(B_N, nmax));
3391: B_IL = 1;
3392: #if defined(PETSC_USE_COMPLEX)
3393: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3394: #else
3395: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3396: #endif
3397: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3398: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3399: PetscInt k;
3401: ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax);
3402: PetscBLASIntCast(nmax, &B_neigs);
3403: nmin = nmax;
3404: PetscArrayzero(eigv, subset_size * nmax);
3405: for (k = 0; k < nmax; k++) {
3406: eigs[k] = 1. / PETSC_SMALL;
3407: eigv[k * (subset_size + 1)] = 1.0;
3408: }
3409: }
3410: PetscFPTrapPop();
3411: if (B_ierr) {
3414: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3415: }
3417: if (B_neigs > nmax) {
3418: if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax);
3419: if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3420: B_neigs = nmax;
3421: }
3423: nmin_s = PetscMin(nmin, B_N);
3424: if (B_neigs < nmin_s) {
3425: PetscBLASInt B_neigs2 = 0;
3427: if (upart) {
3428: if (scal) {
3429: B_IU = nmin_s;
3430: B_IL = B_neigs + 1;
3431: } else {
3432: B_IL = B_N - nmin_s + 1;
3433: B_IU = B_N - B_neigs;
3434: }
3435: } else {
3436: B_IL = B_neigs + 1;
3437: B_IU = nmin_s;
3438: }
3439: if (pcbddc->dbg_flag) {
3440: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU);
3441: }
3442: if (sub_schurs->is_symmetric) {
3443: PetscInt j, k;
3444: for (j = 0; j < subset_size; j++) {
3445: for (k = j; k < subset_size; k++) {
3446: S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k];
3447: St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3448: }
3449: }
3450: } else {
3451: PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3452: PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3453: }
3454: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3455: #if defined(PETSC_USE_COMPLEX)
3456: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3457: #else
3458: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3459: #endif
3460: PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3461: PetscFPTrapPop();
3462: B_neigs += B_neigs2;
3463: }
3464: if (B_ierr) {
3467: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3468: }
3469: if (pcbddc->dbg_flag) {
3470: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs);
3471: for (j = 0; j < B_neigs; j++) {
3472: if (!sub_schurs->gdsw) {
3473: if (eigs[j] == 0.0) {
3474: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n");
3475: } else {
3476: if (upart) {
3477: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start]);
3478: } else {
3479: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]));
3480: }
3481: }
3482: } else {
3483: double pg = (double)eigs[j + eigs_start];
3484: if (pg < 2 * PETSC_SMALL) pg = 0.0;
3485: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg);
3486: }
3487: }
3488: }
3489: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3490: }
3491: /* change the basis back to the original one */
3492: if (sub_schurs->change) {
3493: Mat change, phi, phit;
3495: if (pcbddc->dbg_flag > 2) {
3496: PetscInt ii;
3497: for (ii = 0; ii < B_neigs; ii++) {
3498: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N);
3499: for (j = 0; j < B_N; j++) {
3500: #if defined(PETSC_USE_COMPLEX)
3501: PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3502: PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3503: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c);
3504: #else
3505: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]));
3506: #endif
3507: }
3508: }
3509: }
3510: KSPGetOperators(sub_schurs->change[i], &change, NULL);
3511: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit);
3512: MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi);
3513: MatCopy(phi, phit, SAME_NONZERO_PATTERN);
3514: MatDestroy(&phit);
3515: MatDestroy(&phi);
3516: }
3517: maxneigs = PetscMax(B_neigs, maxneigs);
3518: pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3519: if (B_neigs) {
3520: PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size);
3522: if (pcbddc->dbg_flag > 1) {
3523: PetscInt ii;
3524: for (ii = 0; ii < B_neigs; ii++) {
3525: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N);
3526: for (j = 0; j < B_N; j++) {
3527: #if defined(PETSC_USE_COMPLEX)
3528: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3529: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3530: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c);
3531: #else
3532: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]));
3533: #endif
3534: }
3535: }
3536: }
3537: PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size);
3538: pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3539: pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3540: cum++;
3541: }
3542: ISRestoreIndices(sub_schurs->is_subs[i], &idxs);
3543: /* shift for next computation */
3544: cumarray += subset_size * subset_size;
3545: }
3546: if (pcbddc->dbg_flag) PetscViewerFlush(pcbddc->dbg_viewer);
3548: if (mss) {
3549: if (sub_schurs->gdsw) {
3550: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray);
3551: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray);
3552: } else {
3553: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray);
3554: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray);
3555: /* destroy matrices (junk) */
3556: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3557: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3558: }
3559: }
3560: if (allocated_S_St) PetscFree2(S, St);
3561: PetscFree5(eigv, eigs, work, B_iwork, B_ifail);
3562: #if defined(PETSC_USE_COMPLEX)
3563: PetscFree(rwork);
3564: #endif
3565: if (pcbddc->dbg_flag) {
3566: PetscInt maxneigs_r;
3567: MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc));
3568: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r);
3569: }
3570: PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0);
3571: return 0;
3572: }
3574: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3575: {
3576: PetscScalar *coarse_submat_vals;
3578: /* Setup local scatters R_to_B and (optionally) R_to_D */
3579: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3580: PCBDDCSetUpLocalScatters(pc);
3582: /* Setup local neumann solver ksp_R */
3583: /* PCBDDCSetUpLocalScatters should be called first! */
3584: PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE);
3586: /*
3587: Setup local correction and local part of coarse basis.
3588: Gives back the dense local part of the coarse matrix in column major ordering
3589: */
3590: PCBDDCSetUpCorrection(pc, &coarse_submat_vals);
3592: /* Compute total number of coarse nodes and setup coarse solver */
3593: PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals);
3595: /* free */
3596: PetscFree(coarse_submat_vals);
3597: return 0;
3598: }
3600: PetscErrorCode PCBDDCResetCustomization(PC pc)
3601: {
3602: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3604: ISDestroy(&pcbddc->user_primal_vertices);
3605: ISDestroy(&pcbddc->user_primal_vertices_local);
3606: ISDestroy(&pcbddc->NeumannBoundaries);
3607: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3608: ISDestroy(&pcbddc->DirichletBoundaries);
3609: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3610: PetscFree(pcbddc->onearnullvecs_state);
3611: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3612: PCBDDCSetDofsSplitting(pc, 0, NULL);
3613: PCBDDCSetDofsSplittingLocal(pc, 0, NULL);
3614: return 0;
3615: }
3617: PetscErrorCode PCBDDCResetTopography(PC pc)
3618: {
3619: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3620: PetscInt i;
3622: MatDestroy(&pcbddc->nedcG);
3623: ISDestroy(&pcbddc->nedclocal);
3624: MatDestroy(&pcbddc->discretegradient);
3625: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3626: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3627: MatDestroy(&pcbddc->switch_static_change);
3628: VecDestroy(&pcbddc->work_change);
3629: MatDestroy(&pcbddc->ConstraintMatrix);
3630: MatDestroy(&pcbddc->divudotp);
3631: ISDestroy(&pcbddc->divudotp_vl2l);
3632: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3633: for (i = 0; i < pcbddc->n_local_subs; i++) ISDestroy(&pcbddc->local_subs[i]);
3634: pcbddc->n_local_subs = 0;
3635: PetscFree(pcbddc->local_subs);
3636: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3637: pcbddc->graphanalyzed = PETSC_FALSE;
3638: pcbddc->recompute_topography = PETSC_TRUE;
3639: pcbddc->corner_selected = PETSC_FALSE;
3640: return 0;
3641: }
3643: PetscErrorCode PCBDDCResetSolvers(PC pc)
3644: {
3645: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3647: VecDestroy(&pcbddc->coarse_vec);
3648: if (pcbddc->coarse_phi_B) {
3649: PetscScalar *array;
3650: MatDenseGetArray(pcbddc->coarse_phi_B, &array);
3651: PetscFree(array);
3652: }
3653: MatDestroy(&pcbddc->coarse_phi_B);
3654: MatDestroy(&pcbddc->coarse_phi_D);
3655: MatDestroy(&pcbddc->coarse_psi_B);
3656: MatDestroy(&pcbddc->coarse_psi_D);
3657: VecDestroy(&pcbddc->vec1_P);
3658: VecDestroy(&pcbddc->vec1_C);
3659: MatDestroy(&pcbddc->local_auxmat2);
3660: MatDestroy(&pcbddc->local_auxmat1);
3661: VecDestroy(&pcbddc->vec1_R);
3662: VecDestroy(&pcbddc->vec2_R);
3663: ISDestroy(&pcbddc->is_R_local);
3664: VecScatterDestroy(&pcbddc->R_to_B);
3665: VecScatterDestroy(&pcbddc->R_to_D);
3666: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3667: KSPReset(pcbddc->ksp_D);
3668: KSPReset(pcbddc->ksp_R);
3669: KSPReset(pcbddc->coarse_ksp);
3670: MatDestroy(&pcbddc->local_mat);
3671: PetscFree(pcbddc->primal_indices_local_idxs);
3672: PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult);
3673: PetscFree(pcbddc->global_primal_indices);
3674: ISDestroy(&pcbddc->coarse_subassembling);
3675: MatDestroy(&pcbddc->benign_change);
3676: VecDestroy(&pcbddc->benign_vec);
3677: PCBDDCBenignShellMat(pc, PETSC_TRUE);
3678: MatDestroy(&pcbddc->benign_B0);
3679: PetscSFDestroy(&pcbddc->benign_sf);
3680: if (pcbddc->benign_zerodiag_subs) {
3681: PetscInt i;
3682: for (i = 0; i < pcbddc->benign_n; i++) ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3683: PetscFree(pcbddc->benign_zerodiag_subs);
3684: }
3685: PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0);
3686: return 0;
3687: }
3689: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3690: {
3691: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3692: PC_IS *pcis = (PC_IS *)pc->data;
3693: VecType impVecType;
3694: PetscInt n_constraints, n_R, old_size;
3696: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3697: n_R = pcis->n - pcbddc->n_vertices;
3698: VecGetType(pcis->vec1_N, &impVecType);
3699: /* local work vectors (try to avoid unneeded work)*/
3700: /* R nodes */
3701: old_size = -1;
3702: if (pcbddc->vec1_R) VecGetSize(pcbddc->vec1_R, &old_size);
3703: if (n_R != old_size) {
3704: VecDestroy(&pcbddc->vec1_R);
3705: VecDestroy(&pcbddc->vec2_R);
3706: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R);
3707: VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R);
3708: VecSetType(pcbddc->vec1_R, impVecType);
3709: VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R);
3710: }
3711: /* local primal dofs */
3712: old_size = -1;
3713: if (pcbddc->vec1_P) VecGetSize(pcbddc->vec1_P, &old_size);
3714: if (pcbddc->local_primal_size != old_size) {
3715: VecDestroy(&pcbddc->vec1_P);
3716: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P);
3717: VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size);
3718: VecSetType(pcbddc->vec1_P, impVecType);
3719: }
3720: /* local explicit constraints */
3721: old_size = -1;
3722: if (pcbddc->vec1_C) VecGetSize(pcbddc->vec1_C, &old_size);
3723: if (n_constraints && n_constraints != old_size) {
3724: VecDestroy(&pcbddc->vec1_C);
3725: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C);
3726: VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints);
3727: VecSetType(pcbddc->vec1_C, impVecType);
3728: }
3729: return 0;
3730: }
3732: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3733: {
3734: /* pointers to pcis and pcbddc */
3735: PC_IS *pcis = (PC_IS *)pc->data;
3736: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3737: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3738: /* submatrices of local problem */
3739: Mat A_RV, A_VR, A_VV, local_auxmat2_R;
3740: /* submatrices of local coarse problem */
3741: Mat S_VV, S_CV, S_VC, S_CC;
3742: /* working matrices */
3743: Mat C_CR;
3744: /* additional working stuff */
3745: PC pc_R;
3746: Mat F, Brhs = NULL;
3747: Vec dummy_vec;
3748: PetscBool isLU, isCHOL, need_benign_correction, sparserhs;
3749: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3750: PetscScalar *work;
3751: PetscInt *idx_V_B;
3752: PetscInt lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I;
3753: PetscInt i, n_R, n_D, n_B;
3754: PetscScalar one = 1.0, m_one = -1.0;
3757: PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0);
3759: /* Set Non-overlapping dimensions */
3760: n_vertices = pcbddc->n_vertices;
3761: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3762: n_B = pcis->n_B;
3763: n_D = pcis->n - n_B;
3764: n_R = pcis->n - n_vertices;
3766: /* vertices in boundary numbering */
3767: PetscMalloc1(n_vertices, &idx_V_B);
3768: ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B);
3771: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3772: PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals);
3773: MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV);
3774: MatDenseSetLDA(S_VV, pcbddc->local_primal_size);
3775: MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV);
3776: MatDenseSetLDA(S_CV, pcbddc->local_primal_size);
3777: MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC);
3778: MatDenseSetLDA(S_VC, pcbddc->local_primal_size);
3779: MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC);
3780: MatDenseSetLDA(S_CC, pcbddc->local_primal_size);
3782: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3783: KSPGetPC(pcbddc->ksp_R, &pc_R);
3784: PCSetUp(pc_R);
3785: PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU);
3786: PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL);
3787: lda_rhs = n_R;
3788: need_benign_correction = PETSC_FALSE;
3789: if (isLU || isCHOL) {
3790: PCFactorGetMatrix(pc_R, &F);
3791: } else if (sub_schurs && sub_schurs->reuse_solver) {
3792: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3793: MatFactorType type;
3795: F = reuse_solver->F;
3796: MatGetFactorType(F, &type);
3797: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3798: if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3799: MatGetSize(F, &lda_rhs, NULL);
3800: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3801: } else F = NULL;
3803: /* determine if we can use a sparse right-hand side */
3804: sparserhs = PETSC_FALSE;
3805: if (F) {
3806: MatSolverType solver;
3808: MatFactorGetSolverType(F, &solver);
3809: PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs);
3810: }
3812: /* allocate workspace */
3813: n = 0;
3814: if (n_constraints) n += lda_rhs * n_constraints;
3815: if (n_vertices) {
3816: n = PetscMax(2 * lda_rhs * n_vertices, n);
3817: n = PetscMax((lda_rhs + n_B) * n_vertices, n);
3818: }
3819: if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n);
3820: PetscMalloc1(n, &work);
3822: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3823: dummy_vec = NULL;
3824: if (need_benign_correction && lda_rhs != n_R && F) {
3825: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec);
3826: VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE);
3827: VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name);
3828: }
3830: MatDestroy(&pcbddc->local_auxmat1);
3831: MatDestroy(&pcbddc->local_auxmat2);
3833: /* Precompute stuffs needed for preprocessing and application of BDDC*/
3834: if (n_constraints) {
3835: Mat M3, C_B;
3836: IS is_aux;
3838: /* Extract constraints on R nodes: C_{CR} */
3839: ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux);
3840: MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR);
3841: MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B);
3843: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3844: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3845: if (!sparserhs) {
3846: PetscArrayzero(work, lda_rhs * n_constraints);
3847: for (i = 0; i < n_constraints; i++) {
3848: const PetscScalar *row_cmat_values;
3849: const PetscInt *row_cmat_indices;
3850: PetscInt size_of_constraint, j;
3852: MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values);
3853: for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j];
3854: MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values);
3855: }
3856: MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs);
3857: } else {
3858: Mat tC_CR;
3860: MatScale(C_CR, -1.0);
3861: if (lda_rhs != n_R) {
3862: PetscScalar *aa;
3863: PetscInt r, *ii, *jj;
3864: PetscBool done;
3866: MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done);
3868: MatSeqAIJGetArray(C_CR, &aa);
3869: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR);
3870: MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done);
3872: } else {
3873: PetscObjectReference((PetscObject)C_CR);
3874: tC_CR = C_CR;
3875: }
3876: MatCreateTranspose(tC_CR, &Brhs);
3877: MatDestroy(&tC_CR);
3878: }
3879: MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R);
3880: if (F) {
3881: if (need_benign_correction) {
3882: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3884: /* rhs is already zero on interior dofs, no need to change the rhs */
3885: PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n);
3886: }
3887: MatMatSolve(F, Brhs, local_auxmat2_R);
3888: if (need_benign_correction) {
3889: PetscScalar *marr;
3890: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3892: MatDenseGetArray(local_auxmat2_R, &marr);
3893: if (lda_rhs != n_R) {
3894: for (i = 0; i < n_constraints; i++) {
3895: VecPlaceArray(dummy_vec, marr + i * lda_rhs);
3896: PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE);
3897: VecResetArray(dummy_vec);
3898: }
3899: } else {
3900: for (i = 0; i < n_constraints; i++) {
3901: VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs);
3902: PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE);
3903: VecResetArray(pcbddc->vec1_R);
3904: }
3905: }
3906: MatDenseRestoreArray(local_auxmat2_R, &marr);
3907: }
3908: } else {
3909: PetscScalar *marr;
3911: MatDenseGetArray(local_auxmat2_R, &marr);
3912: for (i = 0; i < n_constraints; i++) {
3913: VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs);
3914: VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs);
3915: KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R);
3916: KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
3917: VecResetArray(pcbddc->vec1_R);
3918: VecResetArray(pcbddc->vec2_R);
3919: }
3920: MatDenseRestoreArray(local_auxmat2_R, &marr);
3921: }
3922: if (sparserhs) MatScale(C_CR, -1.0);
3923: MatDestroy(&Brhs);
3924: if (!pcbddc->switch_static) {
3925: MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2);
3926: for (i = 0; i < n_constraints; i++) {
3927: Vec r, b;
3928: MatDenseGetColumnVecRead(local_auxmat2_R, i, &r);
3929: MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b);
3930: VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD);
3931: VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD);
3932: MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b);
3933: MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r);
3934: }
3935: MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3);
3936: } else {
3937: if (lda_rhs != n_R) {
3938: IS dummy;
3940: ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy);
3941: MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2);
3942: ISDestroy(&dummy);
3943: } else {
3944: PetscObjectReference((PetscObject)local_auxmat2_R);
3945: pcbddc->local_auxmat2 = local_auxmat2_R;
3946: }
3947: MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3);
3948: }
3949: ISDestroy(&is_aux);
3950: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
3951: MatScale(M3, m_one);
3952: if (isCHOL) {
3953: MatCholeskyFactor(M3, NULL, NULL);
3954: } else {
3955: MatLUFactor(M3, NULL, NULL, NULL);
3956: }
3957: MatSeqDenseInvertFactors_Private(M3);
3958: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
3959: MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1);
3960: MatDestroy(&C_B);
3961: MatCopy(M3, S_CC, SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
3962: MatDestroy(&M3);
3963: }
3965: /* Get submatrices from subdomain matrix */
3966: if (n_vertices) {
3967: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3968: PetscBool oldpin;
3969: #endif
3970: PetscBool isaij;
3971: IS is_aux;
3973: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
3974: IS tis;
3976: ISDuplicate(pcbddc->is_R_local, &tis);
3977: ISSort(tis);
3978: ISComplement(tis, 0, pcis->n, &is_aux);
3979: ISDestroy(&tis);
3980: } else {
3981: ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux);
3982: }
3983: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3984: oldpin = pcbddc->local_mat->boundtocpu;
3985: #endif
3986: MatBindToCPU(pcbddc->local_mat, PETSC_TRUE);
3987: MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV);
3988: MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR);
3989: PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij);
3990: if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
3991: MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR);
3992: }
3993: MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV);
3994: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3995: MatBindToCPU(pcbddc->local_mat, oldpin);
3996: #endif
3997: ISDestroy(&is_aux);
3998: }
4000: /* Matrix of coarse basis functions (local) */
4001: if (pcbddc->coarse_phi_B) {
4002: PetscInt on_B, on_primal, on_D = n_D;
4003: if (pcbddc->coarse_phi_D) MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL);
4004: MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal);
4005: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4006: PetscScalar *marray;
4008: MatDenseGetArray(pcbddc->coarse_phi_B, &marray);
4009: PetscFree(marray);
4010: MatDestroy(&pcbddc->coarse_phi_B);
4011: MatDestroy(&pcbddc->coarse_psi_B);
4012: MatDestroy(&pcbddc->coarse_phi_D);
4013: MatDestroy(&pcbddc->coarse_psi_D);
4014: }
4015: }
4017: if (!pcbddc->coarse_phi_B) {
4018: PetscScalar *marr;
4020: /* memory size */
4021: n = n_B * pcbddc->local_primal_size;
4022: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size;
4023: if (!pcbddc->symmetric_primal) n *= 2;
4024: PetscCalloc1(n, &marr);
4025: MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B);
4026: marr += n_B * pcbddc->local_primal_size;
4027: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4028: MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D);
4029: marr += n_D * pcbddc->local_primal_size;
4030: }
4031: if (!pcbddc->symmetric_primal) {
4032: MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B);
4033: marr += n_B * pcbddc->local_primal_size;
4034: if (pcbddc->switch_static || pcbddc->dbg_flag) MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D);
4035: } else {
4036: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4037: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4038: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4039: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4040: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4041: }
4042: }
4043: }
4045: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4046: p0_lidx_I = NULL;
4047: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4048: const PetscInt *idxs;
4050: ISGetIndices(pcis->is_I_local, &idxs);
4051: PetscMalloc1(pcbddc->benign_n, &p0_lidx_I);
4052: for (i = 0; i < pcbddc->benign_n; i++) PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]);
4053: ISRestoreIndices(pcis->is_I_local, &idxs);
4054: }
4056: /* vertices */
4057: if (n_vertices) {
4058: PetscBool restoreavr = PETSC_FALSE;
4060: MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV);
4062: if (n_R) {
4063: Mat A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */
4064: PetscBLASInt B_N, B_one = 1;
4065: const PetscScalar *x;
4066: PetscScalar *y;
4068: MatScale(A_RV, m_one);
4069: if (need_benign_correction) {
4070: ISLocalToGlobalMapping RtoN;
4071: IS is_p0;
4072: PetscInt *idxs_p0, n;
4074: PetscMalloc1(pcbddc->benign_n, &idxs_p0);
4075: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN);
4076: ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0);
4078: ISLocalToGlobalMappingDestroy(&RtoN);
4079: ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0);
4080: MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr);
4081: ISDestroy(&is_p0);
4082: }
4084: MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV);
4085: if (!sparserhs || need_benign_correction) {
4086: if (lda_rhs == n_R) {
4087: MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV);
4088: } else {
4089: PetscScalar *av, *array;
4090: const PetscInt *xadj, *adjncy;
4091: PetscInt n;
4092: PetscBool flg_row;
4094: array = work + lda_rhs * n_vertices;
4095: PetscArrayzero(array, lda_rhs * n_vertices);
4096: MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV);
4097: MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row);
4098: MatSeqAIJGetArray(A_RV, &av);
4099: for (i = 0; i < n; i++) {
4100: PetscInt j;
4101: for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j];
4102: }
4103: MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row);
4104: MatDestroy(&A_RV);
4105: MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV);
4106: }
4107: if (need_benign_correction) {
4108: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4109: PetscScalar *marr;
4111: MatDenseGetArray(A_RV, &marr);
4112: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4114: | 0 0 0 | (V)
4115: L = | 0 0 -1 | (P-p0)
4116: | 0 0 -1 | (p0)
4118: */
4119: for (i = 0; i < reuse_solver->benign_n; i++) {
4120: const PetscScalar *vals;
4121: const PetscInt *idxs, *idxs_zero;
4122: PetscInt n, j, nz;
4124: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz);
4125: ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero);
4126: MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals);
4127: for (j = 0; j < n; j++) {
4128: PetscScalar val = vals[j];
4129: PetscInt k, col = idxs[j];
4130: for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4131: }
4132: MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals);
4133: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero);
4134: }
4135: MatDenseRestoreArray(A_RV, &marr);
4136: }
4137: PetscObjectReference((PetscObject)A_RV);
4138: Brhs = A_RV;
4139: } else {
4140: Mat tA_RVT, A_RVT;
4142: if (!pcbddc->symmetric_primal) {
4143: /* A_RV already scaled by -1 */
4144: MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT);
4145: } else {
4146: restoreavr = PETSC_TRUE;
4147: MatScale(A_VR, -1.0);
4148: PetscObjectReference((PetscObject)A_VR);
4149: A_RVT = A_VR;
4150: }
4151: if (lda_rhs != n_R) {
4152: PetscScalar *aa;
4153: PetscInt r, *ii, *jj;
4154: PetscBool done;
4156: MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done);
4158: MatSeqAIJGetArray(A_RVT, &aa);
4159: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT);
4160: MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done);
4162: } else {
4163: PetscObjectReference((PetscObject)A_RVT);
4164: tA_RVT = A_RVT;
4165: }
4166: MatCreateTranspose(tA_RVT, &Brhs);
4167: MatDestroy(&tA_RVT);
4168: MatDestroy(&A_RVT);
4169: }
4170: if (F) {
4171: /* need to correct the rhs */
4172: if (need_benign_correction) {
4173: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4174: PetscScalar *marr;
4176: MatDenseGetArray(Brhs, &marr);
4177: if (lda_rhs != n_R) {
4178: for (i = 0; i < n_vertices; i++) {
4179: VecPlaceArray(dummy_vec, marr + i * lda_rhs);
4180: PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE);
4181: VecResetArray(dummy_vec);
4182: }
4183: } else {
4184: for (i = 0; i < n_vertices; i++) {
4185: VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs);
4186: PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE);
4187: VecResetArray(pcbddc->vec1_R);
4188: }
4189: }
4190: MatDenseRestoreArray(Brhs, &marr);
4191: }
4192: MatMatSolve(F, Brhs, A_RRmA_RV);
4193: if (restoreavr) MatScale(A_VR, -1.0);
4194: /* need to correct the solution */
4195: if (need_benign_correction) {
4196: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4197: PetscScalar *marr;
4199: MatDenseGetArray(A_RRmA_RV, &marr);
4200: if (lda_rhs != n_R) {
4201: for (i = 0; i < n_vertices; i++) {
4202: VecPlaceArray(dummy_vec, marr + i * lda_rhs);
4203: PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE);
4204: VecResetArray(dummy_vec);
4205: }
4206: } else {
4207: for (i = 0; i < n_vertices; i++) {
4208: VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs);
4209: PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE);
4210: VecResetArray(pcbddc->vec1_R);
4211: }
4212: }
4213: MatDenseRestoreArray(A_RRmA_RV, &marr);
4214: }
4215: } else {
4216: MatDenseGetArray(Brhs, &y);
4217: for (i = 0; i < n_vertices; i++) {
4218: VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs);
4219: VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs);
4220: KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R);
4221: KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
4222: VecResetArray(pcbddc->vec1_R);
4223: VecResetArray(pcbddc->vec2_R);
4224: }
4225: MatDenseRestoreArray(Brhs, &y);
4226: }
4227: MatDestroy(&A_RV);
4228: MatDestroy(&Brhs);
4229: /* S_VV and S_CV */
4230: if (n_constraints) {
4231: Mat B;
4233: PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices);
4234: for (i = 0; i < n_vertices; i++) {
4235: VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs);
4236: VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B);
4237: VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
4238: VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
4239: VecResetArray(pcis->vec1_B);
4240: VecResetArray(pcbddc->vec1_R);
4241: }
4242: MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B);
4243: /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4244: MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV);
4245: MatProductSetType(S_CV, MATPRODUCT_AB);
4246: MatProductSetFromOptions(S_CV);
4247: MatProductSymbolic(S_CV);
4248: MatProductNumeric(S_CV);
4249: MatProductClear(S_CV);
4251: MatDestroy(&B);
4252: MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B);
4253: /* Reuse B = local_auxmat2_R * S_CV */
4254: MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B);
4255: MatProductSetType(B, MATPRODUCT_AB);
4256: MatProductSetFromOptions(B);
4257: MatProductSymbolic(B);
4258: MatProductNumeric(B);
4260: MatScale(S_CV, m_one);
4261: PetscBLASIntCast(lda_rhs * n_vertices, &B_N);
4262: PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one));
4263: MatDestroy(&B);
4264: }
4265: if (lda_rhs != n_R) {
4266: MatDestroy(&A_RRmA_RV);
4267: MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV);
4268: MatDenseSetLDA(A_RRmA_RV, lda_rhs);
4269: }
4270: MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt);
4271: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4272: if (need_benign_correction) {
4273: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4274: PetscScalar *marr, *sums;
4276: PetscMalloc1(n_vertices, &sums);
4277: MatDenseGetArray(S_VVt, &marr);
4278: for (i = 0; i < reuse_solver->benign_n; i++) {
4279: const PetscScalar *vals;
4280: const PetscInt *idxs, *idxs_zero;
4281: PetscInt n, j, nz;
4283: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz);
4284: ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero);
4285: for (j = 0; j < n_vertices; j++) {
4286: PetscInt k;
4287: sums[j] = 0.;
4288: for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs];
4289: }
4290: MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals);
4291: for (j = 0; j < n; j++) {
4292: PetscScalar val = vals[j];
4293: PetscInt k;
4294: for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k];
4295: }
4296: MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals);
4297: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero);
4298: }
4299: PetscFree(sums);
4300: MatDenseRestoreArray(S_VVt, &marr);
4301: MatDestroy(&A_RV_bcorr);
4302: }
4303: MatDestroy(&A_RRmA_RV);
4304: PetscBLASIntCast(n_vertices * n_vertices, &B_N);
4305: MatDenseGetArrayRead(A_VV, &x);
4306: MatDenseGetArray(S_VVt, &y);
4307: PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one));
4308: MatDenseRestoreArrayRead(A_VV, &x);
4309: MatDenseRestoreArray(S_VVt, &y);
4310: MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN);
4311: MatDestroy(&S_VVt);
4312: } else {
4313: MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN);
4314: }
4315: MatDestroy(&A_VV);
4317: /* coarse basis functions */
4318: for (i = 0; i < n_vertices; i++) {
4319: Vec v;
4320: PetscScalar one = 1.0, zero = 0.0;
4322: VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i);
4323: MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v);
4324: VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4325: VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4326: if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4327: PetscMPIInt rank;
4328: MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank);
4330: }
4331: VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES);
4332: VecAssemblyBegin(v); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */
4333: VecAssemblyEnd(v);
4334: MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v);
4336: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4337: PetscInt j;
4339: MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v);
4340: VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4341: VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4342: if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4343: PetscMPIInt rank;
4344: MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank);
4346: }
4347: for (j = 0; j < pcbddc->benign_n; j++) VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES);
4348: VecAssemblyBegin(v);
4349: VecAssemblyEnd(v);
4350: MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v);
4351: }
4352: VecResetArray(pcbddc->vec1_R);
4353: }
4354: /* if n_R == 0 the object is not destroyed */
4355: MatDestroy(&A_RV);
4356: }
4357: VecDestroy(&dummy_vec);
4359: if (n_constraints) {
4360: Mat B;
4362: MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B);
4363: MatScale(S_CC, m_one);
4364: MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B);
4365: MatProductSetType(B, MATPRODUCT_AB);
4366: MatProductSetFromOptions(B);
4367: MatProductSymbolic(B);
4368: MatProductNumeric(B);
4370: MatScale(S_CC, m_one);
4371: if (n_vertices) {
4372: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4373: MatTransposeSetPrecursor(S_CV, S_VC);
4374: MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC);
4375: } else {
4376: Mat S_VCt;
4378: if (lda_rhs != n_R) {
4379: MatDestroy(&B);
4380: MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B);
4381: MatDenseSetLDA(B, lda_rhs);
4382: }
4383: MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt);
4384: MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN);
4385: MatDestroy(&S_VCt);
4386: }
4387: }
4388: MatDestroy(&B);
4389: /* coarse basis functions */
4390: for (i = 0; i < n_constraints; i++) {
4391: Vec v;
4393: VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i);
4394: MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v);
4395: VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4396: VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4397: MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v);
4398: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4399: PetscInt j;
4400: PetscScalar zero = 0.0;
4401: MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v);
4402: VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4403: VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4404: for (j = 0; j < pcbddc->benign_n; j++) VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES);
4405: VecAssemblyBegin(v);
4406: VecAssemblyEnd(v);
4407: MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v);
4408: }
4409: VecResetArray(pcbddc->vec1_R);
4410: }
4411: }
4412: if (n_constraints) MatDestroy(&local_auxmat2_R);
4413: PetscFree(p0_lidx_I);
4415: /* coarse matrix entries relative to B_0 */
4416: if (pcbddc->benign_n) {
4417: Mat B0_B, B0_BPHI;
4418: IS is_dummy;
4419: const PetscScalar *data;
4420: PetscInt j;
4422: ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy);
4423: MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B);
4424: ISDestroy(&is_dummy);
4425: MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI);
4426: MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI);
4427: MatDenseGetArrayRead(B0_BPHI, &data);
4428: for (j = 0; j < pcbddc->benign_n; j++) {
4429: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4430: for (i = 0; i < pcbddc->local_primal_size; i++) {
4431: coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j];
4432: coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j];
4433: }
4434: }
4435: MatDenseRestoreArrayRead(B0_BPHI, &data);
4436: MatDestroy(&B0_B);
4437: MatDestroy(&B0_BPHI);
4438: }
4440: /* compute other basis functions for non-symmetric problems */
4441: if (!pcbddc->symmetric_primal) {
4442: Mat B_V = NULL, B_C = NULL;
4443: PetscScalar *marray;
4445: if (n_constraints) {
4446: Mat S_CCT, C_CRT;
4448: MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT);
4449: MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT);
4450: MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C);
4451: MatDestroy(&S_CCT);
4452: if (n_vertices) {
4453: Mat S_VCT;
4455: MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT);
4456: MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V);
4457: MatDestroy(&S_VCT);
4458: }
4459: MatDestroy(&C_CRT);
4460: } else {
4461: MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V);
4462: }
4463: if (n_vertices && n_R) {
4464: PetscScalar *av, *marray;
4465: const PetscInt *xadj, *adjncy;
4466: PetscInt n;
4467: PetscBool flg_row;
4469: /* B_V = B_V - A_VR^T */
4470: MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR);
4471: MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row);
4472: MatSeqAIJGetArray(A_VR, &av);
4473: MatDenseGetArray(B_V, &marray);
4474: for (i = 0; i < n; i++) {
4475: PetscInt j;
4476: for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
4477: }
4478: MatDenseRestoreArray(B_V, &marray);
4479: MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row);
4480: MatDestroy(&A_VR);
4481: }
4483: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4484: if (n_vertices) {
4485: MatDenseGetArray(B_V, &marray);
4486: for (i = 0; i < n_vertices; i++) {
4487: VecPlaceArray(pcbddc->vec1_R, marray + i * n_R);
4488: VecPlaceArray(pcbddc->vec2_R, work + i * n_R);
4489: KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R);
4490: KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
4491: VecResetArray(pcbddc->vec1_R);
4492: VecResetArray(pcbddc->vec2_R);
4493: }
4494: MatDenseRestoreArray(B_V, &marray);
4495: }
4496: if (B_C) {
4497: MatDenseGetArray(B_C, &marray);
4498: for (i = n_vertices; i < n_constraints + n_vertices; i++) {
4499: VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R);
4500: VecPlaceArray(pcbddc->vec2_R, work + i * n_R);
4501: KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R);
4502: KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
4503: VecResetArray(pcbddc->vec1_R);
4504: VecResetArray(pcbddc->vec2_R);
4505: }
4506: MatDenseRestoreArray(B_C, &marray);
4507: }
4508: /* coarse basis functions */
4509: for (i = 0; i < pcbddc->local_primal_size; i++) {
4510: Vec v;
4512: VecPlaceArray(pcbddc->vec1_R, work + i * n_R);
4513: MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v);
4514: VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4515: VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4516: if (i < n_vertices) {
4517: PetscScalar one = 1.0;
4518: VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES);
4519: VecAssemblyBegin(v);
4520: VecAssemblyEnd(v);
4521: }
4522: MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v);
4524: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4525: MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v);
4526: VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4527: VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4528: MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v);
4529: }
4530: VecResetArray(pcbddc->vec1_R);
4531: }
4532: MatDestroy(&B_V);
4533: MatDestroy(&B_C);
4534: }
4536: /* free memory */
4537: PetscFree(idx_V_B);
4538: MatDestroy(&S_VV);
4539: MatDestroy(&S_CV);
4540: MatDestroy(&S_VC);
4541: MatDestroy(&S_CC);
4542: PetscFree(work);
4543: if (n_vertices) MatDestroy(&A_VR);
4544: if (n_constraints) MatDestroy(&C_CR);
4545: PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0);
4547: /* Checking coarse_sub_mat and coarse basis functions */
4548: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4549: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4550: if (pcbddc->dbg_flag) {
4551: Mat coarse_sub_mat;
4552: Mat AUXMAT, TM1, TM2, TM3, TM4;
4553: Mat coarse_phi_D, coarse_phi_B;
4554: Mat coarse_psi_D, coarse_psi_B;
4555: Mat A_II, A_BB, A_IB, A_BI;
4556: Mat C_B, CPHI;
4557: IS is_dummy;
4558: Vec mones;
4559: MatType checkmattype = MATSEQAIJ;
4560: PetscReal real_value;
4562: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4563: Mat A;
4564: PCBDDCBenignProject(pc, NULL, NULL, &A);
4565: MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II);
4566: MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB);
4567: MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI);
4568: MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB);
4569: MatDestroy(&A);
4570: } else {
4571: MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II);
4572: MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB);
4573: MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI);
4574: MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB);
4575: }
4576: MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D);
4577: MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B);
4578: if (!pcbddc->symmetric_primal) {
4579: MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D);
4580: MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B);
4581: }
4582: MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat);
4584: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
4585: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal);
4586: PetscViewerFlush(pcbddc->dbg_viewer);
4587: if (!pcbddc->symmetric_primal) {
4588: MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4589: MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1);
4590: MatDestroy(&AUXMAT);
4591: MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4592: MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2);
4593: MatDestroy(&AUXMAT);
4594: MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4595: MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3);
4596: MatDestroy(&AUXMAT);
4597: MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4598: MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4);
4599: MatDestroy(&AUXMAT);
4600: } else {
4601: MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1);
4602: MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2);
4603: MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4604: MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3);
4605: MatDestroy(&AUXMAT);
4606: MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4607: MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4);
4608: MatDestroy(&AUXMAT);
4609: }
4610: MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN);
4611: MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN);
4612: MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN);
4613: MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1);
4614: if (pcbddc->benign_n) {
4615: Mat B0_B, B0_BPHI;
4616: const PetscScalar *data2;
4617: PetscScalar *data;
4618: PetscInt j;
4620: ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy);
4621: MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B);
4622: MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI);
4623: MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI);
4624: MatDenseGetArray(TM1, &data);
4625: MatDenseGetArrayRead(B0_BPHI, &data2);
4626: for (j = 0; j < pcbddc->benign_n; j++) {
4627: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4628: for (i = 0; i < pcbddc->local_primal_size; i++) {
4629: data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
4630: data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
4631: }
4632: }
4633: MatDenseRestoreArray(TM1, &data);
4634: MatDenseRestoreArrayRead(B0_BPHI, &data2);
4635: MatDestroy(&B0_B);
4636: ISDestroy(&is_dummy);
4637: MatDestroy(&B0_BPHI);
4638: }
4639: #if 0
4640: {
4641: PetscViewer viewer;
4642: char filename[256];
4643: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4644: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4645: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4646: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4647: MatView(coarse_sub_mat,viewer);
4648: PetscObjectSetName((PetscObject)TM1,"projected");
4649: MatView(TM1,viewer);
4650: if (pcbddc->coarse_phi_B) {
4651: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4652: MatView(pcbddc->coarse_phi_B,viewer);
4653: }
4654: if (pcbddc->coarse_phi_D) {
4655: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4656: MatView(pcbddc->coarse_phi_D,viewer);
4657: }
4658: if (pcbddc->coarse_psi_B) {
4659: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4660: MatView(pcbddc->coarse_psi_B,viewer);
4661: }
4662: if (pcbddc->coarse_psi_D) {
4663: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4664: MatView(pcbddc->coarse_psi_D,viewer);
4665: }
4666: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4667: MatView(pcbddc->local_mat,viewer);
4668: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4669: MatView(pcbddc->ConstraintMatrix,viewer);
4670: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4671: ISView(pcis->is_I_local,viewer);
4672: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4673: ISView(pcis->is_B_local,viewer);
4674: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4675: ISView(pcbddc->is_R_local,viewer);
4676: PetscViewerDestroy(&viewer);
4677: }
4678: #endif
4679: MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN);
4680: MatNorm(TM1, NORM_FROBENIUS, &real_value);
4681: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4682: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value);
4684: /* check constraints */
4685: ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy);
4686: MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B);
4687: if (!pcbddc->benign_n) { /* TODO: add benign case */
4688: MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI);
4689: } else {
4690: PetscScalar *data;
4691: Mat tmat;
4692: MatDenseGetArray(pcbddc->coarse_phi_B, &data);
4693: MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat);
4694: MatDenseRestoreArray(pcbddc->coarse_phi_B, &data);
4695: MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI);
4696: MatDestroy(&tmat);
4697: }
4698: MatCreateVecs(CPHI, &mones, NULL);
4699: VecSet(mones, -1.0);
4700: MatDiagonalSet(CPHI, mones, ADD_VALUES);
4701: MatNorm(CPHI, NORM_FROBENIUS, &real_value);
4702: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value);
4703: if (!pcbddc->symmetric_primal) {
4704: MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI);
4705: VecSet(mones, -1.0);
4706: MatDiagonalSet(CPHI, mones, ADD_VALUES);
4707: MatNorm(CPHI, NORM_FROBENIUS, &real_value);
4708: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value);
4709: }
4710: MatDestroy(&C_B);
4711: MatDestroy(&CPHI);
4712: ISDestroy(&is_dummy);
4713: VecDestroy(&mones);
4714: PetscViewerFlush(pcbddc->dbg_viewer);
4715: MatDestroy(&A_II);
4716: MatDestroy(&A_BB);
4717: MatDestroy(&A_IB);
4718: MatDestroy(&A_BI);
4719: MatDestroy(&TM1);
4720: MatDestroy(&TM2);
4721: MatDestroy(&TM3);
4722: MatDestroy(&TM4);
4723: MatDestroy(&coarse_phi_D);
4724: MatDestroy(&coarse_phi_B);
4725: if (!pcbddc->symmetric_primal) {
4726: MatDestroy(&coarse_psi_D);
4727: MatDestroy(&coarse_psi_B);
4728: }
4729: MatDestroy(&coarse_sub_mat);
4730: }
4731: /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4732: {
4733: PetscBool gpu;
4735: PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu);
4736: if (gpu) {
4737: if (pcbddc->local_auxmat1) MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1);
4738: if (pcbddc->local_auxmat2) MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2);
4739: if (pcbddc->coarse_phi_B) MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B);
4740: if (pcbddc->coarse_phi_D) MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D);
4741: if (pcbddc->coarse_psi_B) MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B);
4742: if (pcbddc->coarse_psi_D) MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D);
4743: }
4744: }
4745: /* get back data */
4746: *coarse_submat_vals_n = coarse_submat_vals;
4747: return 0;
4748: }
4750: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
4751: {
4752: Mat *work_mat;
4753: IS isrow_s, iscol_s;
4754: PetscBool rsorted, csorted;
4755: PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
4757: ISSorted(isrow, &rsorted);
4758: ISSorted(iscol, &csorted);
4759: ISGetLocalSize(isrow, &rsize);
4760: ISGetLocalSize(iscol, &csize);
4762: if (!rsorted) {
4763: const PetscInt *idxs;
4764: PetscInt *idxs_sorted, i;
4766: PetscMalloc1(rsize, &idxs_perm_r);
4767: PetscMalloc1(rsize, &idxs_sorted);
4768: for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
4769: ISGetIndices(isrow, &idxs);
4770: PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r);
4771: for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
4772: ISRestoreIndices(isrow, &idxs);
4773: ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s);
4774: } else {
4775: PetscObjectReference((PetscObject)isrow);
4776: isrow_s = isrow;
4777: }
4779: if (!csorted) {
4780: if (isrow == iscol) {
4781: PetscObjectReference((PetscObject)isrow_s);
4782: iscol_s = isrow_s;
4783: } else {
4784: const PetscInt *idxs;
4785: PetscInt *idxs_sorted, i;
4787: PetscMalloc1(csize, &idxs_perm_c);
4788: PetscMalloc1(csize, &idxs_sorted);
4789: for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
4790: ISGetIndices(iscol, &idxs);
4791: PetscSortIntWithPermutation(csize, idxs, idxs_perm_c);
4792: for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
4793: ISRestoreIndices(iscol, &idxs);
4794: ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s);
4795: }
4796: } else {
4797: PetscObjectReference((PetscObject)iscol);
4798: iscol_s = iscol;
4799: }
4801: MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat);
4803: if (!rsorted || !csorted) {
4804: Mat new_mat;
4805: IS is_perm_r, is_perm_c;
4807: if (!rsorted) {
4808: PetscInt *idxs_r, i;
4809: PetscMalloc1(rsize, &idxs_r);
4810: for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
4811: PetscFree(idxs_perm_r);
4812: ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r);
4813: } else {
4814: ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r);
4815: }
4816: ISSetPermutation(is_perm_r);
4818: if (!csorted) {
4819: if (isrow_s == iscol_s) {
4820: PetscObjectReference((PetscObject)is_perm_r);
4821: is_perm_c = is_perm_r;
4822: } else {
4823: PetscInt *idxs_c, i;
4825: PetscMalloc1(csize, &idxs_c);
4826: for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
4827: PetscFree(idxs_perm_c);
4828: ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c);
4829: }
4830: } else {
4831: ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c);
4832: }
4833: ISSetPermutation(is_perm_c);
4835: MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat);
4836: MatDestroy(&work_mat[0]);
4837: work_mat[0] = new_mat;
4838: ISDestroy(&is_perm_r);
4839: ISDestroy(&is_perm_c);
4840: }
4842: PetscObjectReference((PetscObject)work_mat[0]);
4843: *B = work_mat[0];
4844: MatDestroyMatrices(1, &work_mat);
4845: ISDestroy(&isrow_s);
4846: ISDestroy(&iscol_s);
4847: return 0;
4848: }
4850: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4851: {
4852: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
4853: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4854: Mat new_mat, lA;
4855: IS is_local, is_global;
4856: PetscInt local_size;
4857: PetscBool isseqaij, issym, isset;
4859: MatDestroy(&pcbddc->local_mat);
4860: MatGetSize(matis->A, &local_size, NULL);
4861: ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local);
4862: ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global);
4863: ISDestroy(&is_local);
4864: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat);
4865: ISDestroy(&is_global);
4867: if (pcbddc->dbg_flag) {
4868: Vec x, x_change;
4869: PetscReal error;
4871: MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change);
4872: VecSetRandom(x, NULL);
4873: MatMult(ChangeOfBasisMatrix, x, x_change);
4874: VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD);
4875: VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD);
4876: MatMult(new_mat, matis->x, matis->y);
4877: if (!pcbddc->change_interior) {
4878: const PetscScalar *x, *y, *v;
4879: PetscReal lerror = 0.;
4880: PetscInt i;
4882: VecGetArrayRead(matis->x, &x);
4883: VecGetArrayRead(matis->y, &y);
4884: VecGetArrayRead(matis->counter, &v);
4885: for (i = 0; i < local_size; i++)
4886: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
4887: VecRestoreArrayRead(matis->x, &x);
4888: VecRestoreArrayRead(matis->y, &y);
4889: VecRestoreArrayRead(matis->counter, &v);
4890: MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc));
4891: if (error > PETSC_SMALL) {
4892: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4893: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
4894: } else {
4895: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
4896: }
4897: }
4898: }
4899: VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE);
4900: VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE);
4901: VecAXPY(x, -1.0, x_change);
4902: VecNorm(x, NORM_INFINITY, &error);
4903: if (error > PETSC_SMALL) {
4904: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4905: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
4906: } else {
4907: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
4908: }
4909: }
4910: VecDestroy(&x);
4911: VecDestroy(&x_change);
4912: }
4914: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4915: PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA);
4917: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4918: PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij);
4919: if (isseqaij) {
4920: MatDestroy(&pcbddc->local_mat);
4921: MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat);
4922: if (lA) {
4923: Mat work;
4924: MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work);
4925: PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work);
4926: MatDestroy(&work);
4927: }
4928: } else {
4929: Mat work_mat;
4931: MatDestroy(&pcbddc->local_mat);
4932: MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat);
4933: MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat);
4934: MatDestroy(&work_mat);
4935: if (lA) {
4936: Mat work;
4937: MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat);
4938: MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work);
4939: PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work);
4940: MatDestroy(&work);
4941: }
4942: }
4943: MatIsSymmetricKnown(matis->A, &isset, &issym);
4944: if (isset) MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym);
4945: MatDestroy(&new_mat);
4946: return 0;
4947: }
4949: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4950: {
4951: PC_IS *pcis = (PC_IS *)(pc->data);
4952: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4953: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4954: PetscInt *idx_R_local = NULL;
4955: PetscInt n_vertices, i, j, n_R, n_D, n_B;
4956: PetscInt vbs, bs;
4957: PetscBT bitmask = NULL;
4959: /*
4960: No need to setup local scatters if
4961: - primal space is unchanged
4962: AND
4963: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4964: AND
4965: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
4966: */
4967: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) return 0;
4968: /* destroy old objects */
4969: ISDestroy(&pcbddc->is_R_local);
4970: VecScatterDestroy(&pcbddc->R_to_B);
4971: VecScatterDestroy(&pcbddc->R_to_D);
4972: /* Set Non-overlapping dimensions */
4973: n_B = pcis->n_B;
4974: n_D = pcis->n - n_B;
4975: n_vertices = pcbddc->n_vertices;
4977: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
4979: /* create auxiliary bitmask and allocate workspace */
4980: if (!sub_schurs || !sub_schurs->reuse_solver) {
4981: PetscMalloc1(pcis->n - n_vertices, &idx_R_local);
4982: PetscBTCreate(pcis->n, &bitmask);
4983: for (i = 0; i < n_vertices; i++) PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]);
4985: for (i = 0, n_R = 0; i < pcis->n; i++) {
4986: if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
4987: }
4988: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
4989: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4991: ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local);
4992: ISGetLocalSize(reuse_solver->is_R, &n_R);
4993: }
4995: /* Block code */
4996: vbs = 1;
4997: MatGetBlockSize(pcbddc->local_mat, &bs);
4998: if (bs > 1 && !(n_vertices % bs)) {
4999: PetscBool is_blocked = PETSC_TRUE;
5000: PetscInt *vary;
5001: if (!sub_schurs || !sub_schurs->reuse_solver) {
5002: PetscMalloc1(pcis->n / bs, &vary);
5003: PetscArrayzero(vary, pcis->n / bs);
5004: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5005: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5006: for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5007: for (i = 0; i < pcis->n / bs; i++) {
5008: if (vary[i] != 0 && vary[i] != bs) {
5009: is_blocked = PETSC_FALSE;
5010: break;
5011: }
5012: }
5013: PetscFree(vary);
5014: } else {
5015: /* Verify directly the R set */
5016: for (i = 0; i < n_R / bs; i++) {
5017: PetscInt j, node = idx_R_local[bs * i];
5018: for (j = 1; j < bs; j++) {
5019: if (node != idx_R_local[bs * i + j] - j) {
5020: is_blocked = PETSC_FALSE;
5021: break;
5022: }
5023: }
5024: }
5025: }
5026: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5027: vbs = bs;
5028: for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5029: }
5030: }
5031: ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local);
5032: if (sub_schurs && sub_schurs->reuse_solver) {
5033: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5035: ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local);
5036: ISDestroy(&reuse_solver->is_R);
5037: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5038: reuse_solver->is_R = pcbddc->is_R_local;
5039: } else {
5040: PetscFree(idx_R_local);
5041: }
5043: /* print some info if requested */
5044: if (pcbddc->dbg_flag) {
5045: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
5046: PetscViewerFlush(pcbddc->dbg_viewer);
5047: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5048: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank);
5049: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B);
5050: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5051: pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5052: PetscViewerFlush(pcbddc->dbg_viewer);
5053: }
5055: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5056: if (!sub_schurs || !sub_schurs->reuse_solver) {
5057: IS is_aux1, is_aux2;
5058: PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5060: ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local);
5061: PetscMalloc1(pcis->n_B - n_vertices, &aux_array1);
5062: PetscMalloc1(pcis->n_B - n_vertices, &aux_array2);
5063: ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices);
5064: for (i = 0; i < n_D; i++) PetscBTSet(bitmask, is_indices[i]);
5065: ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices);
5066: for (i = 0, j = 0; i < n_R; i++) {
5067: if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5068: }
5069: ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1);
5070: ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices);
5071: for (i = 0, j = 0; i < n_B; i++) {
5072: if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5073: }
5074: ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices);
5075: ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2);
5076: VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B);
5077: ISDestroy(&is_aux1);
5078: ISDestroy(&is_aux2);
5080: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5081: PetscMalloc1(n_D, &aux_array1);
5082: for (i = 0, j = 0; i < n_R; i++) {
5083: if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5084: }
5085: ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1);
5086: VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D);
5087: ISDestroy(&is_aux1);
5088: }
5089: PetscBTDestroy(&bitmask);
5090: ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local);
5091: } else {
5092: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5093: IS tis;
5094: PetscInt schur_size;
5096: ISGetLocalSize(reuse_solver->is_B, &schur_size);
5097: ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis);
5098: VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B);
5099: ISDestroy(&tis);
5100: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5101: ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis);
5102: VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D);
5103: ISDestroy(&tis);
5104: }
5105: }
5106: return 0;
5107: }
5109: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5110: {
5111: MatNullSpace NullSpace;
5112: Mat dmat;
5113: const Vec *nullvecs;
5114: Vec v, v2, *nullvecs2;
5115: VecScatter sct = NULL;
5116: PetscContainer c;
5117: PetscScalar *ddata;
5118: PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs;
5119: PetscBool nnsp_has_cnst;
5121: if (!is && !B) { /* MATIS */
5122: Mat_IS *matis = (Mat_IS *)A->data;
5124: if (!B) MatISGetLocalMat(A, &B);
5125: sct = matis->cctx;
5126: PetscObjectReference((PetscObject)sct);
5127: } else {
5128: MatGetNullSpace(B, &NullSpace);
5129: if (!NullSpace) MatGetNearNullSpace(B, &NullSpace);
5130: if (NullSpace) return 0;
5131: }
5132: MatGetNullSpace(A, &NullSpace);
5133: if (!NullSpace) MatGetNearNullSpace(A, &NullSpace);
5134: if (!NullSpace) return 0;
5136: MatCreateVecs(A, &v, NULL);
5137: MatCreateVecs(B, &v2, NULL);
5138: if (!sct) VecScatterCreate(v, is, v2, NULL, &sct);
5139: MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs);
5140: bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5141: PetscMalloc1(bsiz, &nullvecs2);
5142: VecGetBlockSize(v2, &bs);
5143: VecGetSize(v2, &N);
5144: VecGetLocalSize(v2, &n);
5145: PetscMalloc1(n * bsiz, &ddata);
5146: for (k = 0; k < nnsp_size; k++) {
5147: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]);
5148: VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD);
5149: VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD);
5150: }
5151: if (nnsp_has_cnst) {
5152: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]);
5153: VecSet(nullvecs2[nnsp_size], 1.0);
5154: }
5155: PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2);
5156: MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace);
5158: MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat);
5159: PetscContainerCreate(PetscObjectComm((PetscObject)B), &c);
5160: PetscContainerSetPointer(c, ddata);
5161: PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault);
5162: PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c);
5163: PetscContainerDestroy(&c);
5164: PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat);
5165: MatDestroy(&dmat);
5167: for (k = 0; k < bsiz; k++) VecDestroy(&nullvecs2[k]);
5168: PetscFree(nullvecs2);
5169: MatSetNearNullSpace(B, NullSpace);
5170: MatNullSpaceDestroy(&NullSpace);
5171: VecDestroy(&v);
5172: VecDestroy(&v2);
5173: VecScatterDestroy(&sct);
5174: return 0;
5175: }
5177: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5178: {
5179: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5180: PC_IS *pcis = (PC_IS *)pc->data;
5181: PC pc_temp;
5182: Mat A_RR;
5183: MatNullSpace nnsp;
5184: MatReuse reuse;
5185: PetscScalar m_one = -1.0;
5186: PetscReal value;
5187: PetscInt n_D, n_R;
5188: PetscBool issbaij, opts, isset, issym;
5189: void (*f)(void) = NULL;
5190: char dir_prefix[256], neu_prefix[256], str_level[16];
5191: size_t len;
5193: PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0);
5194: /* approximate solver, propagate NearNullSpace if needed */
5195: if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5196: MatNullSpace gnnsp1, gnnsp2;
5197: PetscBool lhas, ghas;
5199: MatGetNearNullSpace(pcbddc->local_mat, &nnsp);
5200: MatGetNearNullSpace(pc->pmat, &gnnsp1);
5201: MatGetNullSpace(pc->pmat, &gnnsp2);
5202: lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5203: MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
5204: if (!ghas && (gnnsp1 || gnnsp2)) MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL);
5205: }
5207: /* compute prefixes */
5208: PetscStrcpy(dir_prefix, "");
5209: PetscStrcpy(neu_prefix, "");
5210: if (!pcbddc->current_level) {
5211: PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix));
5212: PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix));
5213: PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix));
5214: PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix));
5215: } else {
5216: PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level));
5217: PetscStrlen(((PetscObject)pc)->prefix, &len);
5218: len -= 15; /* remove "pc_bddc_coarse_" */
5219: if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */
5220: if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5221: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5222: PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1);
5223: PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1);
5224: PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix));
5225: PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix));
5226: PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix));
5227: PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix));
5228: }
5230: /* DIRICHLET PROBLEM */
5231: if (dirichlet) {
5232: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5233: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5235: if (pcbddc->dbg_flag) {
5236: Mat A_IIn;
5238: PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn);
5239: MatDestroy(&pcis->A_II);
5240: pcis->A_II = A_IIn;
5241: }
5242: }
5243: MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym);
5244: if (isset) MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym);
5246: /* Matrix for Dirichlet problem is pcis->A_II */
5247: n_D = pcis->n - pcis->n_B;
5248: opts = PETSC_FALSE;
5249: if (!pcbddc->ksp_D) { /* create object if not yet build */
5250: opts = PETSC_TRUE;
5251: KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D);
5252: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1);
5253: /* default */
5254: KSPSetType(pcbddc->ksp_D, KSPPREONLY);
5255: KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix);
5256: PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij);
5257: KSPGetPC(pcbddc->ksp_D, &pc_temp);
5258: if (issbaij) {
5259: PCSetType(pc_temp, PCCHOLESKY);
5260: } else {
5261: PCSetType(pc_temp, PCLU);
5262: }
5263: KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure);
5264: }
5265: MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix);
5266: KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II);
5267: /* Allow user's customization */
5268: if (opts) KSPSetFromOptions(pcbddc->ksp_D);
5269: MatGetNearNullSpace(pcis->pA_II, &nnsp);
5270: if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5271: MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II);
5272: }
5273: MatGetNearNullSpace(pcis->pA_II, &nnsp);
5274: KSPGetPC(pcbddc->ksp_D, &pc_temp);
5275: PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f);
5276: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5277: PetscReal *coords = pcbddc->mat_graph->coords, *scoords;
5278: const PetscInt *idxs;
5279: PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d;
5281: ISGetLocalSize(pcis->is_I_local, &nl);
5282: ISGetIndices(pcis->is_I_local, &idxs);
5283: PetscMalloc1(nl * cdim, &scoords);
5284: for (i = 0; i < nl; i++) {
5285: for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5286: }
5287: ISRestoreIndices(pcis->is_I_local, &idxs);
5288: PCSetCoordinates(pc_temp, cdim, nl, scoords);
5289: PetscFree(scoords);
5290: }
5291: if (sub_schurs && sub_schurs->reuse_solver) {
5292: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5294: KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver);
5295: }
5297: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5298: if (!n_D) {
5299: KSPGetPC(pcbddc->ksp_D, &pc_temp);
5300: PCSetType(pc_temp, PCNONE);
5301: }
5302: KSPSetUp(pcbddc->ksp_D);
5303: /* set ksp_D into pcis data */
5304: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5305: KSPDestroy(&pcis->ksp_D);
5306: pcis->ksp_D = pcbddc->ksp_D;
5307: }
5309: /* NEUMANN PROBLEM */
5310: A_RR = NULL;
5311: if (neumann) {
5312: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5313: PetscInt ibs, mbs;
5314: PetscBool issbaij, reuse_neumann_solver, isset, issym;
5315: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
5317: reuse_neumann_solver = PETSC_FALSE;
5318: if (sub_schurs && sub_schurs->reuse_solver) {
5319: IS iP;
5321: reuse_neumann_solver = PETSC_TRUE;
5322: PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP);
5323: if (iP) reuse_neumann_solver = PETSC_FALSE;
5324: }
5325: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5326: ISGetSize(pcbddc->is_R_local, &n_R);
5327: if (pcbddc->ksp_R) { /* already created ksp */
5328: PetscInt nn_R;
5329: KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR);
5330: PetscObjectReference((PetscObject)A_RR);
5331: MatGetSize(A_RR, &nn_R, NULL);
5332: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5333: KSPReset(pcbddc->ksp_R);
5334: MatDestroy(&A_RR);
5335: reuse = MAT_INITIAL_MATRIX;
5336: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5337: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5338: MatDestroy(&A_RR);
5339: reuse = MAT_INITIAL_MATRIX;
5340: } else { /* safe to reuse the matrix */
5341: reuse = MAT_REUSE_MATRIX;
5342: }
5343: }
5344: /* last check */
5345: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5346: MatDestroy(&A_RR);
5347: reuse = MAT_INITIAL_MATRIX;
5348: }
5349: } else { /* first time, so we need to create the matrix */
5350: reuse = MAT_INITIAL_MATRIX;
5351: }
5352: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5353: TODO: Get Rid of these conversions */
5354: MatGetBlockSize(pcbddc->local_mat, &mbs);
5355: ISGetBlockSize(pcbddc->is_R_local, &ibs);
5356: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij);
5357: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5358: if (matis->A == pcbddc->local_mat) {
5359: MatDestroy(&pcbddc->local_mat);
5360: MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat);
5361: } else {
5362: MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat);
5363: }
5364: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5365: if (matis->A == pcbddc->local_mat) {
5366: MatDestroy(&pcbddc->local_mat);
5367: MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat);
5368: } else {
5369: MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat);
5370: }
5371: }
5372: /* extract A_RR */
5373: if (reuse_neumann_solver) {
5374: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5376: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5377: MatDestroy(&A_RR);
5378: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5379: PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR);
5380: } else {
5381: MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR);
5382: }
5383: } else {
5384: MatDestroy(&A_RR);
5385: PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL);
5386: PetscObjectReference((PetscObject)A_RR);
5387: }
5388: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5389: MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR);
5390: }
5391: MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym);
5392: if (isset) MatSetOption(A_RR, MAT_SYMMETRIC, issym);
5393: opts = PETSC_FALSE;
5394: if (!pcbddc->ksp_R) { /* create object if not present */
5395: opts = PETSC_TRUE;
5396: KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R);
5397: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1);
5398: /* default */
5399: KSPSetType(pcbddc->ksp_R, KSPPREONLY);
5400: KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix);
5401: KSPGetPC(pcbddc->ksp_R, &pc_temp);
5402: PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij);
5403: if (issbaij) {
5404: PCSetType(pc_temp, PCCHOLESKY);
5405: } else {
5406: PCSetType(pc_temp, PCLU);
5407: }
5408: KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure);
5409: }
5410: KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR);
5411: MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix);
5412: if (opts) { /* Allow user's customization once */
5413: KSPSetFromOptions(pcbddc->ksp_R);
5414: }
5415: MatGetNearNullSpace(A_RR, &nnsp);
5416: if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5417: MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR);
5418: }
5419: MatGetNearNullSpace(A_RR, &nnsp);
5420: KSPGetPC(pcbddc->ksp_R, &pc_temp);
5421: PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f);
5422: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5423: PetscReal *coords = pcbddc->mat_graph->coords, *scoords;
5424: const PetscInt *idxs;
5425: PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d;
5427: ISGetLocalSize(pcbddc->is_R_local, &nl);
5428: ISGetIndices(pcbddc->is_R_local, &idxs);
5429: PetscMalloc1(nl * cdim, &scoords);
5430: for (i = 0; i < nl; i++) {
5431: for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5432: }
5433: ISRestoreIndices(pcbddc->is_R_local, &idxs);
5434: PCSetCoordinates(pc_temp, cdim, nl, scoords);
5435: PetscFree(scoords);
5436: }
5438: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5439: if (!n_R) {
5440: KSPGetPC(pcbddc->ksp_R, &pc_temp);
5441: PCSetType(pc_temp, PCNONE);
5442: }
5443: /* Reuse solver if it is present */
5444: if (reuse_neumann_solver) {
5445: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5447: KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver);
5448: }
5449: KSPSetUp(pcbddc->ksp_R);
5450: }
5452: if (pcbddc->dbg_flag) {
5453: PetscViewerFlush(pcbddc->dbg_viewer);
5454: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5455: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
5456: }
5457: PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0);
5459: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5460: if (pcbddc->NullSpace_corr[0]) PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE);
5461: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]);
5462: if (neumann && pcbddc->NullSpace_corr[2]) PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]);
5463: /* check Dirichlet and Neumann solvers */
5464: if (pcbddc->dbg_flag) {
5465: if (dirichlet) { /* Dirichlet */
5466: VecSetRandom(pcis->vec1_D, NULL);
5467: MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D);
5468: KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D);
5469: KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D);
5470: VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D);
5471: VecNorm(pcis->vec1_D, NORM_INFINITY, &value);
5472: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value);
5473: PetscViewerFlush(pcbddc->dbg_viewer);
5474: }
5475: if (neumann) { /* Neumann */
5476: VecSetRandom(pcbddc->vec1_R, NULL);
5477: MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R);
5478: KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R);
5479: KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
5480: VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R);
5481: VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value);
5482: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value);
5483: PetscViewerFlush(pcbddc->dbg_viewer);
5484: }
5485: }
5486: /* free Neumann problem's matrix */
5487: MatDestroy(&A_RR);
5488: return 0;
5489: }
5491: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5492: {
5493: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
5494: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5495: PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
5497: if (!reuse_solver) VecSet(pcbddc->vec1_R, 0.);
5498: if (!pcbddc->switch_static) {
5499: if (applytranspose && pcbddc->local_auxmat1) {
5500: MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C);
5501: MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B);
5502: }
5503: if (!reuse_solver) {
5504: VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5505: VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5506: } else {
5507: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5509: VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD);
5510: VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD);
5511: }
5512: } else {
5513: VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5514: VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5515: VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5516: VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5517: if (applytranspose && pcbddc->local_auxmat1) {
5518: MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C);
5519: MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B);
5520: VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5521: VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5522: }
5523: }
5524: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0);
5525: if (!reuse_solver || pcbddc->switch_static) {
5526: if (applytranspose) {
5527: KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R);
5528: } else {
5529: KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R);
5530: }
5531: KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R);
5532: } else {
5533: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5535: if (applytranspose) {
5536: MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B);
5537: } else {
5538: MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B);
5539: }
5540: }
5541: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0);
5542: VecSet(inout_B, 0.);
5543: if (!pcbddc->switch_static) {
5544: if (!reuse_solver) {
5545: VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5546: VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5547: } else {
5548: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5550: VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE);
5551: VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE);
5552: }
5553: if (!applytranspose && pcbddc->local_auxmat1) {
5554: MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C);
5555: MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B);
5556: }
5557: } else {
5558: VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5559: VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5560: VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD);
5561: VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD);
5562: if (!applytranspose && pcbddc->local_auxmat1) {
5563: MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C);
5564: MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R);
5565: }
5566: VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5567: VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5568: VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD);
5569: VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD);
5570: }
5571: return 0;
5572: }
5574: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5575: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5576: {
5577: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
5578: PC_IS *pcis = (PC_IS *)(pc->data);
5579: const PetscScalar zero = 0.0;
5581: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5582: if (!pcbddc->benign_apply_coarse_only) {
5583: if (applytranspose) {
5584: MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P);
5585: if (pcbddc->switch_static) MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P);
5586: } else {
5587: MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P);
5588: if (pcbddc->switch_static) MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P);
5589: }
5590: } else {
5591: VecSet(pcbddc->vec1_P, zero);
5592: }
5594: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5595: if (pcbddc->benign_n) {
5596: PetscScalar *array;
5597: PetscInt j;
5599: VecGetArray(pcbddc->vec1_P, &array);
5600: for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
5601: VecRestoreArray(pcbddc->vec1_P, &array);
5602: }
5604: /* start communications from local primal nodes to rhs of coarse solver */
5605: VecSet(pcbddc->coarse_vec, zero);
5606: PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD);
5607: PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD);
5609: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5610: if (pcbddc->coarse_ksp) {
5611: Mat coarse_mat;
5612: Vec rhs, sol;
5613: MatNullSpace nullsp;
5614: PetscBool isbddc = PETSC_FALSE;
5616: if (pcbddc->benign_have_null) {
5617: PC coarse_pc;
5619: KSPGetPC(pcbddc->coarse_ksp, &coarse_pc);
5620: PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc);
5621: /* we need to propagate to coarser levels the need for a possible benign correction */
5622: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5623: PC_BDDC *coarsepcbddc = (PC_BDDC *)(coarse_pc->data);
5624: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5625: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5626: }
5627: }
5628: KSPGetRhs(pcbddc->coarse_ksp, &rhs);
5629: KSPGetSolution(pcbddc->coarse_ksp, &sol);
5630: KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL);
5631: if (applytranspose) {
5633: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0);
5634: KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol);
5635: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0);
5636: KSPCheckSolve(pcbddc->coarse_ksp, pc, sol);
5637: MatGetTransposeNullSpace(coarse_mat, &nullsp);
5638: if (nullsp) MatNullSpaceRemove(nullsp, sol);
5639: } else {
5640: MatGetNullSpace(coarse_mat, &nullsp);
5641: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5642: PC coarse_pc;
5644: if (nullsp) MatNullSpaceRemove(nullsp, rhs);
5645: KSPGetPC(pcbddc->coarse_ksp, &coarse_pc);
5646: PCPreSolve(coarse_pc, pcbddc->coarse_ksp);
5647: PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol);
5648: PCPostSolve(coarse_pc, pcbddc->coarse_ksp);
5649: } else {
5650: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0);
5651: KSPSolve(pcbddc->coarse_ksp, rhs, sol);
5652: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0);
5653: KSPCheckSolve(pcbddc->coarse_ksp, pc, sol);
5654: if (nullsp) MatNullSpaceRemove(nullsp, sol);
5655: }
5656: }
5657: /* we don't need the benign correction at coarser levels anymore */
5658: if (pcbddc->benign_have_null && isbddc) {
5659: PC coarse_pc;
5660: PC_BDDC *coarsepcbddc;
5662: KSPGetPC(pcbddc->coarse_ksp, &coarse_pc);
5663: coarsepcbddc = (PC_BDDC *)(coarse_pc->data);
5664: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5665: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5666: }
5667: }
5669: /* Local solution on R nodes */
5670: if (pcis->n && !pcbddc->benign_apply_coarse_only) PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose);
5671: /* communications from coarse sol to local primal nodes */
5672: PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE);
5673: PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE);
5675: /* Sum contributions from the two levels */
5676: if (!pcbddc->benign_apply_coarse_only) {
5677: if (applytranspose) {
5678: MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B);
5679: if (pcbddc->switch_static) MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D);
5680: } else {
5681: MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B);
5682: if (pcbddc->switch_static) MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D);
5683: }
5684: /* store p0 */
5685: if (pcbddc->benign_n) {
5686: PetscScalar *array;
5687: PetscInt j;
5689: VecGetArray(pcbddc->vec1_P, &array);
5690: for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
5691: VecRestoreArray(pcbddc->vec1_P, &array);
5692: }
5693: } else { /* expand the coarse solution */
5694: if (applytranspose) {
5695: MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B);
5696: } else {
5697: MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B);
5698: }
5699: }
5700: return 0;
5701: }
5703: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
5704: {
5705: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
5706: Vec from, to;
5707: const PetscScalar *array;
5709: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5710: from = pcbddc->coarse_vec;
5711: to = pcbddc->vec1_P;
5712: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5713: Vec tvec;
5715: KSPGetRhs(pcbddc->coarse_ksp, &tvec);
5716: VecResetArray(tvec);
5717: KSPGetSolution(pcbddc->coarse_ksp, &tvec);
5718: VecGetArrayRead(tvec, &array);
5719: VecPlaceArray(from, array);
5720: VecRestoreArrayRead(tvec, &array);
5721: }
5722: } else { /* from local to global -> put data in coarse right hand side */
5723: from = pcbddc->vec1_P;
5724: to = pcbddc->coarse_vec;
5725: }
5726: VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode);
5727: return 0;
5728: }
5730: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5731: {
5732: PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
5733: Vec from, to;
5734: const PetscScalar *array;
5736: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5737: from = pcbddc->coarse_vec;
5738: to = pcbddc->vec1_P;
5739: } else { /* from local to global -> put data in coarse right hand side */
5740: from = pcbddc->vec1_P;
5741: to = pcbddc->coarse_vec;
5742: }
5743: VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode);
5744: if (smode == SCATTER_FORWARD) {
5745: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5746: Vec tvec;
5748: KSPGetRhs(pcbddc->coarse_ksp, &tvec);
5749: VecGetArrayRead(to, &array);
5750: VecPlaceArray(tvec, array);
5751: VecRestoreArrayRead(to, &array);
5752: }
5753: } else {
5754: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5755: VecResetArray(from);
5756: }
5757: }
5758: return 0;
5759: }
5761: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5762: {
5763: PC_IS *pcis = (PC_IS *)(pc->data);
5764: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5765: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
5766: /* one and zero */
5767: PetscScalar one = 1.0, zero = 0.0;
5768: /* space to store constraints and their local indices */
5769: PetscScalar *constraints_data;
5770: PetscInt *constraints_idxs, *constraints_idxs_B;
5771: PetscInt *constraints_idxs_ptr, *constraints_data_ptr;
5772: PetscInt *constraints_n;
5773: /* iterators */
5774: PetscInt i, j, k, total_counts, total_counts_cc, cum;
5775: /* BLAS integers */
5776: PetscBLASInt lwork, lierr;
5777: PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
5778: PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
5779: /* reuse */
5780: PetscInt olocal_primal_size, olocal_primal_size_cc;
5781: PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
5782: /* change of basis */
5783: PetscBool qr_needed;
5784: PetscBT change_basis, qr_needed_idx;
5785: /* auxiliary stuff */
5786: PetscInt *nnz, *is_indices;
5787: PetscInt ncc;
5788: /* some quantities */
5789: PetscInt n_vertices, total_primal_vertices, valid_constraints;
5790: PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
5791: PetscReal tol; /* tolerance for retaining eigenmodes */
5793: tol = PetscSqrtReal(PETSC_SMALL);
5794: /* Destroy Mat objects computed previously */
5795: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5796: MatDestroy(&pcbddc->ConstraintMatrix);
5797: MatDestroy(&pcbddc->switch_static_change);
5798: /* save info on constraints from previous setup (if any) */
5799: olocal_primal_size = pcbddc->local_primal_size;
5800: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5801: PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult);
5802: PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc);
5803: PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc);
5804: PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult);
5805: PetscFree(pcbddc->primal_indices_local_idxs);
5807: if (!pcbddc->adaptive_selection) {
5808: IS ISForVertices, *ISForFaces, *ISForEdges;
5809: MatNullSpace nearnullsp;
5810: const Vec *nearnullvecs;
5811: Vec *localnearnullsp;
5812: PetscScalar *array;
5813: PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
5814: PetscBool nnsp_has_cnst;
5815: /* LAPACK working arrays for SVD or POD */
5816: PetscBool skip_lapack, boolforchange;
5817: PetscScalar *work;
5818: PetscReal *singular_vals;
5819: #if defined(PETSC_USE_COMPLEX)
5820: PetscReal *rwork;
5821: #endif
5822: PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
5823: PetscBLASInt dummy_int = 1;
5824: PetscScalar dummy_scalar = 1.;
5825: PetscBool use_pod = PETSC_FALSE;
5827: /* MKL SVD with same input gives different results on different processes! */
5828: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
5829: use_pod = PETSC_TRUE;
5830: #endif
5831: /* Get index sets for faces, edges and vertices from graph */
5832: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices);
5833: o_nf = n_ISForFaces;
5834: o_ne = n_ISForEdges;
5835: n_vertices = 0;
5836: if (ISForVertices) ISGetSize(ISForVertices, &n_vertices);
5837: /* print some info */
5838: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5839: if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
5840: PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer);
5841: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5842: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n");
5843: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices);
5844: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges);
5845: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces);
5846: PetscViewerFlush(pcbddc->dbg_viewer);
5847: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5848: }
5850: if (!pcbddc->use_vertices) n_vertices = 0;
5851: if (!pcbddc->use_edges) n_ISForEdges = 0;
5852: if (!pcbddc->use_faces) n_ISForFaces = 0;
5854: /* check if near null space is attached to global mat */
5855: if (pcbddc->use_nnsp) {
5856: MatGetNearNullSpace(pc->pmat, &nearnullsp);
5857: } else nearnullsp = NULL;
5859: if (nearnullsp) {
5860: MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs);
5861: /* remove any stored info */
5862: MatNullSpaceDestroy(&pcbddc->onearnullspace);
5863: PetscFree(pcbddc->onearnullvecs_state);
5864: /* store information for BDDC solver reuse */
5865: PetscObjectReference((PetscObject)nearnullsp);
5866: pcbddc->onearnullspace = nearnullsp;
5867: PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state);
5868: for (i = 0; i < nnsp_size; i++) PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]);
5869: } else { /* if near null space is not provided BDDC uses constants by default */
5870: nnsp_size = 0;
5871: nnsp_has_cnst = PETSC_TRUE;
5872: }
5873: /* get max number of constraints on a single cc */
5874: max_constraints = nnsp_size;
5875: if (nnsp_has_cnst) max_constraints++;
5877: /*
5878: Evaluate maximum storage size needed by the procedure
5879: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5880: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5881: There can be multiple constraints per connected component
5882: */
5883: ncc = n_vertices + n_ISForFaces + n_ISForEdges;
5884: PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n);
5886: total_counts = n_ISForFaces + n_ISForEdges;
5887: total_counts *= max_constraints;
5888: total_counts += n_vertices;
5889: PetscBTCreate(total_counts, &change_basis);
5891: total_counts = 0;
5892: max_size_of_constraint = 0;
5893: for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
5894: IS used_is;
5895: if (i < n_ISForEdges) {
5896: used_is = ISForEdges[i];
5897: } else {
5898: used_is = ISForFaces[i - n_ISForEdges];
5899: }
5900: ISGetSize(used_is, &j);
5901: total_counts += j;
5902: max_size_of_constraint = PetscMax(j, max_size_of_constraint);
5903: }
5904: PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B);
5906: /* get local part of global near null space vectors */
5907: PetscMalloc1(nnsp_size, &localnearnullsp);
5908: for (k = 0; k < nnsp_size; k++) {
5909: VecDuplicate(pcis->vec1_N, &localnearnullsp[k]);
5910: VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD);
5911: VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD);
5912: }
5914: /* whether or not to skip lapack calls */
5915: skip_lapack = PETSC_TRUE;
5916: if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
5918: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5919: if (!skip_lapack) {
5920: PetscScalar temp_work;
5922: if (use_pod) {
5923: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5924: PetscMalloc1(max_constraints * max_constraints, &correlation_mat);
5925: PetscMalloc1(max_constraints, &singular_vals);
5926: PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis);
5927: #if defined(PETSC_USE_COMPLEX)
5928: PetscMalloc1(3 * max_constraints, &rwork);
5929: #endif
5930: /* now we evaluate the optimal workspace using query with lwork=-1 */
5931: PetscBLASIntCast(max_constraints, &Blas_N);
5932: PetscBLASIntCast(max_constraints, &Blas_LDA);
5933: lwork = -1;
5934: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5935: #if !defined(PETSC_USE_COMPLEX)
5936: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
5937: #else
5938: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
5939: #endif
5940: PetscFPTrapPop();
5942: } else {
5943: #if !defined(PETSC_MISSING_LAPACK_GESVD)
5944: /* SVD */
5945: PetscInt max_n, min_n;
5946: max_n = max_size_of_constraint;
5947: min_n = max_constraints;
5948: if (max_size_of_constraint < max_constraints) {
5949: min_n = max_size_of_constraint;
5950: max_n = max_constraints;
5951: }
5952: PetscMalloc1(min_n, &singular_vals);
5953: #if defined(PETSC_USE_COMPLEX)
5954: PetscMalloc1(5 * min_n, &rwork);
5955: #endif
5956: /* now we evaluate the optimal workspace using query with lwork=-1 */
5957: lwork = -1;
5958: PetscBLASIntCast(max_n, &Blas_M);
5959: PetscBLASIntCast(min_n, &Blas_N);
5960: PetscBLASIntCast(max_n, &Blas_LDA);
5961: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5962: #if !defined(PETSC_USE_COMPLEX)
5963: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
5964: #else
5965: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
5966: #endif
5967: PetscFPTrapPop();
5969: #else
5970: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
5971: #endif /* on missing GESVD */
5972: }
5973: /* Allocate optimal workspace */
5974: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork);
5975: PetscMalloc1(lwork, &work);
5976: }
5977: /* Now we can loop on constraining sets */
5978: total_counts = 0;
5979: constraints_idxs_ptr[0] = 0;
5980: constraints_data_ptr[0] = 0;
5981: /* vertices */
5982: if (n_vertices) {
5983: ISGetIndices(ISForVertices, (const PetscInt **)&is_indices);
5984: PetscArraycpy(constraints_idxs, is_indices, n_vertices);
5985: for (i = 0; i < n_vertices; i++) {
5986: constraints_n[total_counts] = 1;
5987: constraints_data[total_counts] = 1.0;
5988: constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
5989: constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
5990: total_counts++;
5991: }
5992: ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices);
5993: }
5995: /* edges and faces */
5996: total_counts_cc = total_counts;
5997: for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
5998: IS used_is;
5999: PetscBool idxs_copied = PETSC_FALSE;
6001: if (ncc < n_ISForEdges) {
6002: used_is = ISForEdges[ncc];
6003: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6004: } else {
6005: used_is = ISForFaces[ncc - n_ISForEdges];
6006: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6007: }
6008: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6010: ISGetSize(used_is, &size_of_constraint);
6011: if (!size_of_constraint) continue;
6012: ISGetIndices(used_is, (const PetscInt **)&is_indices);
6013: /* change of basis should not be performed on local periodic nodes */
6014: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6015: if (nnsp_has_cnst) {
6016: PetscScalar quad_value;
6018: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint);
6019: idxs_copied = PETSC_TRUE;
6021: if (!pcbddc->use_nnsp_true) {
6022: quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6023: } else {
6024: quad_value = 1.0;
6025: }
6026: for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6027: temp_constraints++;
6028: total_counts++;
6029: }
6030: for (k = 0; k < nnsp_size; k++) {
6031: PetscReal real_value;
6032: PetscScalar *ptr_to_data;
6034: VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array);
6035: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6036: for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6037: VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array);
6038: /* check if array is null on the connected component */
6039: PetscBLASIntCast(size_of_constraint, &Blas_N);
6040: PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6041: if (real_value > tol * size_of_constraint) { /* keep indices and values */
6042: temp_constraints++;
6043: total_counts++;
6044: if (!idxs_copied) {
6045: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint);
6046: idxs_copied = PETSC_TRUE;
6047: }
6048: }
6049: }
6050: ISRestoreIndices(used_is, (const PetscInt **)&is_indices);
6051: valid_constraints = temp_constraints;
6052: if (!pcbddc->use_nnsp_true && temp_constraints) {
6053: if (temp_constraints == 1) { /* just normalize the constraint */
6054: PetscScalar norm, *ptr_to_data;
6056: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6057: PetscBLASIntCast(size_of_constraint, &Blas_N);
6058: PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6059: norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6060: PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6061: } else { /* perform SVD */
6062: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6064: if (use_pod) {
6065: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6066: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6067: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6068: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6069: from that computed using LAPACKgesvd
6070: -> This is due to a different computation of eigenvectors in LAPACKheev
6071: -> The quality of the POD-computed basis will be the same */
6072: PetscArrayzero(correlation_mat, temp_constraints * temp_constraints);
6073: /* Store upper triangular part of correlation matrix */
6074: PetscBLASIntCast(size_of_constraint, &Blas_N);
6075: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6076: for (j = 0; j < temp_constraints; j++) {
6077: for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6078: }
6079: /* compute eigenvalues and eigenvectors of correlation matrix */
6080: PetscBLASIntCast(temp_constraints, &Blas_N);
6081: PetscBLASIntCast(temp_constraints, &Blas_LDA);
6082: #if !defined(PETSC_USE_COMPLEX)
6083: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6084: #else
6085: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6086: #endif
6087: PetscFPTrapPop();
6089: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6090: j = 0;
6091: while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6092: total_counts = total_counts - j;
6093: valid_constraints = temp_constraints - j;
6094: /* scale and copy POD basis into used quadrature memory */
6095: PetscBLASIntCast(size_of_constraint, &Blas_M);
6096: PetscBLASIntCast(temp_constraints, &Blas_N);
6097: PetscBLASIntCast(temp_constraints, &Blas_K);
6098: PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6099: PetscBLASIntCast(temp_constraints, &Blas_LDB);
6100: PetscBLASIntCast(size_of_constraint, &Blas_LDC);
6101: if (j < temp_constraints) {
6102: PetscInt ii;
6103: for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6104: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6105: PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6106: PetscFPTrapPop();
6107: for (k = 0; k < temp_constraints - j; k++) {
6108: for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6109: }
6110: }
6111: } else {
6112: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6113: PetscBLASIntCast(size_of_constraint, &Blas_M);
6114: PetscBLASIntCast(temp_constraints, &Blas_N);
6115: PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6116: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6117: #if !defined(PETSC_USE_COMPLEX)
6118: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6119: #else
6120: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6121: #endif
6123: PetscFPTrapPop();
6124: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6125: k = temp_constraints;
6126: if (k > size_of_constraint) k = size_of_constraint;
6127: j = 0;
6128: while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6129: valid_constraints = k - j;
6130: total_counts = total_counts - temp_constraints + valid_constraints;
6131: #else
6132: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6133: #endif /* on missing GESVD */
6134: }
6135: }
6136: }
6137: /* update pointers information */
6138: if (valid_constraints) {
6139: constraints_n[total_counts_cc] = valid_constraints;
6140: constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6141: constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6142: /* set change_of_basis flag */
6143: if (boolforchange) PetscBTSet(change_basis, total_counts_cc);
6144: total_counts_cc++;
6145: }
6146: }
6147: /* free workspace */
6148: if (!skip_lapack) {
6149: PetscFree(work);
6150: #if defined(PETSC_USE_COMPLEX)
6151: PetscFree(rwork);
6152: #endif
6153: PetscFree(singular_vals);
6154: PetscFree(correlation_mat);
6155: PetscFree(temp_basis);
6156: }
6157: for (k = 0; k < nnsp_size; k++) VecDestroy(&localnearnullsp[k]);
6158: PetscFree(localnearnullsp);
6159: /* free index sets of faces, edges and vertices */
6160: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices);
6161: } else {
6162: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6164: total_counts = 0;
6165: n_vertices = 0;
6166: if (sub_schurs->is_vertices && pcbddc->use_vertices) ISGetLocalSize(sub_schurs->is_vertices, &n_vertices);
6167: max_constraints = 0;
6168: total_counts_cc = 0;
6169: for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6170: total_counts += pcbddc->adaptive_constraints_n[i];
6171: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6172: max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6173: }
6174: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6175: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6176: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6177: constraints_data = pcbddc->adaptive_constraints_data;
6178: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6179: PetscMalloc1(total_counts_cc, &constraints_n);
6180: total_counts_cc = 0;
6181: for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6182: if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6183: }
6185: max_size_of_constraint = 0;
6186: for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6187: PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B);
6188: /* Change of basis */
6189: PetscBTCreate(total_counts_cc, &change_basis);
6190: if (pcbddc->use_change_of_basis) {
6191: for (i = 0; i < sub_schurs->n_subs; i++) {
6192: if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscBTSet(change_basis, i + n_vertices);
6193: }
6194: }
6195: }
6196: pcbddc->local_primal_size = total_counts;
6197: PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs);
6199: /* map constraints_idxs in boundary numbering */
6200: if (pcbddc->use_change_of_basis) {
6201: ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B);
6203: }
6205: /* Create constraint matrix */
6206: MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix);
6207: MatSetType(pcbddc->ConstraintMatrix, MATAIJ);
6208: MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n);
6210: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6211: /* determine if a QR strategy is needed for change of basis */
6212: qr_needed = pcbddc->use_qr_single;
6213: PetscBTCreate(total_counts_cc, &qr_needed_idx);
6214: total_primal_vertices = 0;
6215: pcbddc->local_primal_size_cc = 0;
6216: for (i = 0; i < total_counts_cc; i++) {
6217: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6218: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6219: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6220: pcbddc->local_primal_size_cc += 1;
6221: } else if (PetscBTLookup(change_basis, i)) {
6222: for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6223: pcbddc->local_primal_size_cc += constraints_n[i];
6224: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6225: PetscBTSet(qr_needed_idx, i);
6226: qr_needed = PETSC_TRUE;
6227: }
6228: } else {
6229: pcbddc->local_primal_size_cc += 1;
6230: }
6231: }
6232: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6233: pcbddc->n_vertices = total_primal_vertices;
6234: /* permute indices in order to have a sorted set of vertices */
6235: PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs);
6236: PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult);
6237: PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices);
6238: for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6240: /* nonzero structure of constraint matrix */
6241: /* and get reference dof for local constraints */
6242: PetscMalloc1(pcbddc->local_primal_size, &nnz);
6243: for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6245: j = total_primal_vertices;
6246: total_counts = total_primal_vertices;
6247: cum = total_primal_vertices;
6248: for (i = n_vertices; i < total_counts_cc; i++) {
6249: if (!PetscBTLookup(change_basis, i)) {
6250: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6251: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6252: cum++;
6253: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6254: for (k = 0; k < constraints_n[i]; k++) {
6255: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6256: nnz[j + k] = size_of_constraint;
6257: }
6258: j += constraints_n[i];
6259: }
6260: }
6261: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz);
6262: MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
6263: PetscFree(nnz);
6265: /* set values in constraint matrix */
6266: for (i = 0; i < total_primal_vertices; i++) MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES);
6267: total_counts = total_primal_vertices;
6268: for (i = n_vertices; i < total_counts_cc; i++) {
6269: if (!PetscBTLookup(change_basis, i)) {
6270: PetscInt *cols;
6272: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6273: cols = constraints_idxs + constraints_idxs_ptr[i];
6274: for (k = 0; k < constraints_n[i]; k++) {
6275: PetscInt row = total_counts + k;
6276: PetscScalar *vals;
6278: vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6279: MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES);
6280: }
6281: total_counts += constraints_n[i];
6282: }
6283: }
6284: /* assembling */
6285: MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY);
6286: MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY);
6287: MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view");
6289: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6290: if (pcbddc->use_change_of_basis) {
6291: /* dual and primal dofs on a single cc */
6292: PetscInt dual_dofs, primal_dofs;
6293: /* working stuff for GEQRF */
6294: PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6295: PetscBLASInt lqr_work;
6296: /* working stuff for UNGQR */
6297: PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6298: PetscBLASInt lgqr_work;
6299: /* working stuff for TRTRS */
6300: PetscScalar *trs_rhs = NULL;
6301: PetscBLASInt Blas_NRHS;
6302: /* pointers for values insertion into change of basis matrix */
6303: PetscInt *start_rows, *start_cols;
6304: PetscScalar *start_vals;
6305: /* working stuff for values insertion */
6306: PetscBT is_primal;
6307: PetscInt *aux_primal_numbering_B;
6308: /* matrix sizes */
6309: PetscInt global_size, local_size;
6310: /* temporary change of basis */
6311: Mat localChangeOfBasisMatrix;
6312: /* extra space for debugging */
6313: PetscScalar *dbg_work = NULL;
6315: MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix);
6316: MatSetType(localChangeOfBasisMatrix, MATAIJ);
6317: MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n);
6318: /* nonzeros for local mat */
6319: PetscMalloc1(pcis->n, &nnz);
6320: if (!pcbddc->benign_change || pcbddc->fake_change) {
6321: for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6322: } else {
6323: const PetscInt *ii;
6324: PetscInt n;
6325: PetscBool flg_row;
6326: MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row);
6327: for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6328: MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row);
6329: }
6330: for (i = n_vertices; i < total_counts_cc; i++) {
6331: if (PetscBTLookup(change_basis, i)) {
6332: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6333: if (PetscBTLookup(qr_needed_idx, i)) {
6334: for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6335: } else {
6336: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6337: for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6338: }
6339: }
6340: }
6341: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz);
6342: MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
6343: PetscFree(nnz);
6344: /* Set interior change in the matrix */
6345: if (!pcbddc->benign_change || pcbddc->fake_change) {
6346: for (i = 0; i < pcis->n; i++) MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES);
6347: } else {
6348: const PetscInt *ii, *jj;
6349: PetscScalar *aa;
6350: PetscInt n;
6351: PetscBool flg_row;
6352: MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row);
6353: MatSeqAIJGetArray(pcbddc->benign_change, &aa);
6354: for (i = 0; i < n; i++) MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES);
6355: MatSeqAIJRestoreArray(pcbddc->benign_change, &aa);
6356: MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row);
6357: }
6359: if (pcbddc->dbg_flag) {
6360: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n");
6361: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank);
6362: }
6364: /* Now we loop on the constraints which need a change of basis */
6365: /*
6366: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6367: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6369: Basic blocks of change of basis matrix T computed:
6371: - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6373: | 1 0 ... 0 s_1/S |
6374: | 0 1 ... 0 s_2/S |
6375: | ... |
6376: | 0 ... 1 s_{n-1}/S |
6377: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6379: with S = \sum_{i=1}^n s_i^2
6380: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6381: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6383: - QR decomposition of constraints otherwise
6384: */
6385: if (qr_needed && max_size_of_constraint) {
6386: /* space to store Q */
6387: PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis);
6388: /* array to store scaling factors for reflectors */
6389: PetscMalloc1(max_constraints, &qr_tau);
6390: /* first we issue queries for optimal work */
6391: PetscBLASIntCast(max_size_of_constraint, &Blas_M);
6392: PetscBLASIntCast(max_constraints, &Blas_N);
6393: PetscBLASIntCast(max_size_of_constraint, &Blas_LDA);
6394: lqr_work = -1;
6395: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
6397: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work);
6398: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t), &qr_work);
6399: lgqr_work = -1;
6400: PetscBLASIntCast(max_size_of_constraint, &Blas_M);
6401: PetscBLASIntCast(max_size_of_constraint, &Blas_N);
6402: PetscBLASIntCast(max_constraints, &Blas_K);
6403: PetscBLASIntCast(max_size_of_constraint, &Blas_LDA);
6404: if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
6405: PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
6407: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work);
6408: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t), &gqr_work);
6409: /* array to store rhs and solution of triangular solver */
6410: PetscMalloc1(max_constraints * max_constraints, &trs_rhs);
6411: /* allocating workspace for check */
6412: if (pcbddc->dbg_flag) PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work);
6413: }
6414: /* array to store whether a node is primal or not */
6415: PetscBTCreate(pcis->n_B, &is_primal);
6416: PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B);
6417: ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B);
6419: for (i = 0; i < total_primal_vertices; i++) PetscBTSet(is_primal, aux_primal_numbering_B[i]);
6420: PetscFree(aux_primal_numbering_B);
6422: /* loop on constraints and see whether or not they need a change of basis and compute it */
6423: for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
6424: size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
6425: if (PetscBTLookup(change_basis, total_counts)) {
6426: /* get constraint info */
6427: primal_dofs = constraints_n[total_counts];
6428: dual_dofs = size_of_constraint - primal_dofs;
6430: if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint);
6432: if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
6434: /* copy quadrature constraints for change of basis check */
6435: if (pcbddc->dbg_flag) PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs);
6436: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6437: PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs);
6439: /* compute QR decomposition of constraints */
6440: PetscBLASIntCast(size_of_constraint, &Blas_M);
6441: PetscBLASIntCast(primal_dofs, &Blas_N);
6442: PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6443: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6444: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
6446: PetscFPTrapPop();
6448: /* explicitly compute R^-T */
6449: PetscArrayzero(trs_rhs, primal_dofs * primal_dofs);
6450: for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
6451: PetscBLASIntCast(primal_dofs, &Blas_N);
6452: PetscBLASIntCast(primal_dofs, &Blas_NRHS);
6453: PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6454: PetscBLASIntCast(primal_dofs, &Blas_LDB);
6455: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6456: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
6458: PetscFPTrapPop();
6460: /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6461: PetscBLASIntCast(size_of_constraint, &Blas_M);
6462: PetscBLASIntCast(size_of_constraint, &Blas_N);
6463: PetscBLASIntCast(primal_dofs, &Blas_K);
6464: PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6465: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6466: PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
6468: PetscFPTrapPop();
6470: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6471: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6472: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6473: PetscBLASIntCast(size_of_constraint, &Blas_M);
6474: PetscBLASIntCast(primal_dofs, &Blas_N);
6475: PetscBLASIntCast(primal_dofs, &Blas_K);
6476: PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6477: PetscBLASIntCast(primal_dofs, &Blas_LDB);
6478: PetscBLASIntCast(size_of_constraint, &Blas_LDC);
6479: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6480: PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
6481: PetscFPTrapPop();
6482: PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs);
6484: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6485: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6486: /* insert cols for primal dofs */
6487: for (j = 0; j < primal_dofs; j++) {
6488: start_vals = &qr_basis[j * size_of_constraint];
6489: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6490: MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES);
6491: }
6492: /* insert cols for dual dofs */
6493: for (j = 0, k = 0; j < dual_dofs; k++) {
6494: if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
6495: start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
6496: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6497: MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES);
6498: j++;
6499: }
6500: }
6502: /* check change of basis */
6503: if (pcbddc->dbg_flag) {
6504: PetscInt ii, jj;
6505: PetscBool valid_qr = PETSC_TRUE;
6506: PetscBLASIntCast(primal_dofs, &Blas_M);
6507: PetscBLASIntCast(size_of_constraint, &Blas_N);
6508: PetscBLASIntCast(size_of_constraint, &Blas_K);
6509: PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6510: PetscBLASIntCast(size_of_constraint, &Blas_LDB);
6511: PetscBLASIntCast(primal_dofs, &Blas_LDC);
6512: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6513: PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
6514: PetscFPTrapPop();
6515: for (jj = 0; jj < size_of_constraint; jj++) {
6516: for (ii = 0; ii < primal_dofs; ii++) {
6517: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6518: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6519: }
6520: }
6521: if (!valid_qr) {
6522: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n");
6523: for (jj = 0; jj < size_of_constraint; jj++) {
6524: for (ii = 0; ii < primal_dofs; ii++) {
6525: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
6526: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]));
6527: }
6528: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
6529: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]));
6530: }
6531: }
6532: }
6533: } else {
6534: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n");
6535: }
6536: }
6537: } else { /* simple transformation block */
6538: PetscInt row, col;
6539: PetscScalar val, norm;
6541: PetscBLASIntCast(size_of_constraint, &Blas_N);
6542: PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
6543: for (j = 0; j < size_of_constraint; j++) {
6544: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
6545: row = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6546: if (!PetscBTLookup(is_primal, row_B)) {
6547: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6548: MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES);
6549: MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES);
6550: } else {
6551: for (k = 0; k < size_of_constraint; k++) {
6552: col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6553: if (row != col) {
6554: val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
6555: } else {
6556: val = constraints_data[constraints_data_ptr[total_counts]] / norm;
6557: }
6558: MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES);
6559: }
6560: }
6561: }
6562: if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n");
6563: }
6564: } else {
6565: if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint);
6566: }
6567: }
6569: /* free workspace */
6570: if (qr_needed) {
6571: if (pcbddc->dbg_flag) PetscFree(dbg_work);
6572: PetscFree(trs_rhs);
6573: PetscFree(qr_tau);
6574: PetscFree(qr_work);
6575: PetscFree(gqr_work);
6576: PetscFree(qr_basis);
6577: }
6578: PetscBTDestroy(&is_primal);
6579: MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY);
6580: MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY);
6582: /* assembling of global change of variable */
6583: if (!pcbddc->fake_change) {
6584: Mat tmat;
6585: PetscInt bs;
6587: VecGetSize(pcis->vec1_global, &global_size);
6588: VecGetLocalSize(pcis->vec1_global, &local_size);
6589: MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat);
6590: MatISSetLocalMat(tmat, localChangeOfBasisMatrix);
6591: MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY);
6592: MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY);
6593: MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix);
6594: MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ);
6595: MatGetBlockSize(pc->pmat, &bs);
6596: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs);
6597: MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size);
6598: MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE);
6599: MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix);
6600: MatDestroy(&tmat);
6601: VecSet(pcis->vec1_global, 0.0);
6602: VecSet(pcis->vec1_N, 1.0);
6603: VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
6604: VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
6605: VecReciprocal(pcis->vec1_global);
6606: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL);
6608: /* check */
6609: if (pcbddc->dbg_flag) {
6610: PetscReal error;
6611: Vec x, x_change;
6613: VecDuplicate(pcis->vec1_global, &x);
6614: VecDuplicate(pcis->vec1_global, &x_change);
6615: VecSetRandom(x, NULL);
6616: VecCopy(x, pcis->vec1_global);
6617: VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD);
6618: VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD);
6619: MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N);
6620: VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE);
6621: VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE);
6622: MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change);
6623: VecAXPY(x, -1.0, x_change);
6624: VecNorm(x, NORM_INFINITY, &error);
6626: VecDestroy(&x);
6627: VecDestroy(&x_change);
6628: }
6629: /* adapt sub_schurs computed (if any) */
6630: if (pcbddc->use_deluxe_scaling) {
6631: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6634: if (sub_schurs && sub_schurs->S_Ej_all) {
6635: Mat S_new, tmat;
6636: IS is_all_N, is_V_Sall = NULL;
6638: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N);
6639: MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat);
6640: if (pcbddc->deluxe_zerorows) {
6641: ISLocalToGlobalMapping NtoSall;
6642: IS is_V;
6643: ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V);
6644: ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall);
6645: ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall);
6646: ISLocalToGlobalMappingDestroy(&NtoSall);
6647: ISDestroy(&is_V);
6648: }
6649: ISDestroy(&is_all_N);
6650: MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new);
6651: MatDestroy(&sub_schurs->S_Ej_all);
6652: PetscObjectReference((PetscObject)S_new);
6653: if (pcbddc->deluxe_zerorows) {
6654: const PetscScalar *array;
6655: const PetscInt *idxs_V, *idxs_all;
6656: PetscInt i, n_V;
6658: MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL);
6659: ISGetLocalSize(is_V_Sall, &n_V);
6660: ISGetIndices(is_V_Sall, &idxs_V);
6661: ISGetIndices(sub_schurs->is_Ej_all, &idxs_all);
6662: VecGetArrayRead(pcis->D, &array);
6663: for (i = 0; i < n_V; i++) {
6664: PetscScalar val;
6665: PetscInt idx;
6667: idx = idxs_V[i];
6668: val = array[idxs_all[idxs_V[i]]];
6669: MatSetValue(S_new, idx, idx, val, INSERT_VALUES);
6670: }
6671: MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY);
6672: MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY);
6673: VecRestoreArrayRead(pcis->D, &array);
6674: ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all);
6675: ISRestoreIndices(is_V_Sall, &idxs_V);
6676: }
6677: sub_schurs->S_Ej_all = S_new;
6678: MatDestroy(&S_new);
6679: if (sub_schurs->sum_S_Ej_all) {
6680: MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new);
6681: MatDestroy(&sub_schurs->sum_S_Ej_all);
6682: PetscObjectReference((PetscObject)S_new);
6683: if (pcbddc->deluxe_zerorows) MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL);
6684: sub_schurs->sum_S_Ej_all = S_new;
6685: MatDestroy(&S_new);
6686: }
6687: ISDestroy(&is_V_Sall);
6688: MatDestroy(&tmat);
6689: }
6690: /* destroy any change of basis context in sub_schurs */
6691: if (sub_schurs && sub_schurs->change) {
6692: PetscInt i;
6694: for (i = 0; i < sub_schurs->n_subs; i++) KSPDestroy(&sub_schurs->change[i]);
6695: PetscFree(sub_schurs->change);
6696: }
6697: }
6698: if (pcbddc->switch_static) { /* need to save the local change */
6699: pcbddc->switch_static_change = localChangeOfBasisMatrix;
6700: } else {
6701: MatDestroy(&localChangeOfBasisMatrix);
6702: }
6703: /* determine if any process has changed the pressures locally */
6704: pcbddc->change_interior = pcbddc->benign_have_null;
6705: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6706: MatDestroy(&pcbddc->ConstraintMatrix);
6707: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6708: pcbddc->use_qr_single = qr_needed;
6709: }
6710: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6711: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6712: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6713: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6714: } else {
6715: Mat benign_global = NULL;
6716: if (pcbddc->benign_have_null) {
6717: Mat M;
6719: pcbddc->change_interior = PETSC_TRUE;
6720: VecCopy(matis->counter, pcis->vec1_N);
6721: VecReciprocal(pcis->vec1_N);
6722: MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global);
6723: if (pcbddc->benign_change) {
6724: MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M);
6725: MatDiagonalScale(M, pcis->vec1_N, NULL);
6726: } else {
6727: MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M);
6728: MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES);
6729: }
6730: MatISSetLocalMat(benign_global, M);
6731: MatDestroy(&M);
6732: MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY);
6733: MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY);
6734: }
6735: if (pcbddc->user_ChangeOfBasisMatrix) {
6736: MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix);
6737: MatDestroy(&benign_global);
6738: } else if (pcbddc->benign_have_null) {
6739: pcbddc->ChangeOfBasisMatrix = benign_global;
6740: }
6741: }
6742: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6743: IS is_global;
6744: const PetscInt *gidxs;
6746: ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs);
6747: ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global);
6748: ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs);
6749: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change);
6750: ISDestroy(&is_global);
6751: }
6752: }
6753: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) VecDuplicate(pcis->vec1_global, &pcbddc->work_change);
6755: if (!pcbddc->fake_change) {
6756: /* add pressure dofs to set of primal nodes for numbering purposes */
6757: for (i = 0; i < pcbddc->benign_n; i++) {
6758: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6759: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6760: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6761: pcbddc->local_primal_size_cc++;
6762: pcbddc->local_primal_size++;
6763: }
6765: /* check if a new primal space has been introduced (also take into account benign trick) */
6766: pcbddc->new_primal_space_local = PETSC_TRUE;
6767: if (olocal_primal_size == pcbddc->local_primal_size) {
6768: PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local);
6769: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6770: if (!pcbddc->new_primal_space_local) {
6771: PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local);
6772: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6773: }
6774: }
6775: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6776: MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
6777: }
6778: PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult);
6780: /* flush dbg viewer */
6781: if (pcbddc->dbg_flag) PetscViewerFlush(pcbddc->dbg_viewer);
6783: /* free workspace */
6784: PetscBTDestroy(&qr_needed_idx);
6785: PetscBTDestroy(&change_basis);
6786: if (!pcbddc->adaptive_selection) {
6787: PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n);
6788: PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B);
6789: } else {
6790: PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data);
6791: PetscFree(constraints_n);
6792: PetscFree(constraints_idxs_B);
6793: }
6794: return 0;
6795: }
6797: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6798: {
6799: ISLocalToGlobalMapping map;
6800: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6801: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
6802: PetscInt i, N;
6803: PetscBool rcsr = PETSC_FALSE;
6805: if (pcbddc->recompute_topography) {
6806: pcbddc->graphanalyzed = PETSC_FALSE;
6807: /* Reset previously computed graph */
6808: PCBDDCGraphReset(pcbddc->mat_graph);
6809: /* Init local Graph struct */
6810: MatGetSize(pc->pmat, &N, NULL);
6811: MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL);
6812: PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount);
6814: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local);
6815: /* Check validity of the csr graph passed in by the user */
6817: pcbddc->mat_graph->nvtxs);
6819: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6820: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6821: PetscInt *xadj, *adjncy;
6822: PetscInt nvtxs;
6823: PetscBool flg_row = PETSC_FALSE;
6825: MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row);
6826: if (flg_row) {
6827: PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES);
6828: pcbddc->computed_rowadj = PETSC_TRUE;
6829: }
6830: MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row);
6831: rcsr = PETSC_TRUE;
6832: }
6833: if (pcbddc->dbg_flag) PetscViewerFlush(pcbddc->dbg_viewer);
6835: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6836: PetscReal *lcoords;
6837: PetscInt n;
6838: MPI_Datatype dimrealtype;
6840: /* TODO: support for blocked */
6842: MatGetLocalSize(matis->A, &n, NULL);
6843: PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords);
6844: MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype);
6845: MPI_Type_commit(&dimrealtype);
6846: PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE);
6847: PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE);
6848: MPI_Type_free(&dimrealtype);
6849: PetscFree(pcbddc->mat_graph->coords);
6851: pcbddc->mat_graph->coords = lcoords;
6852: pcbddc->mat_graph->cloc = PETSC_TRUE;
6853: pcbddc->mat_graph->cnloc = n;
6854: }
6856: pcbddc->mat_graph->nvtxs);
6857: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
6859: /* Setup of Graph */
6860: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6861: PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local);
6863: /* attach info on disconnected subdomains if present */
6864: if (pcbddc->n_local_subs) {
6865: PetscInt *local_subs, n, totn;
6867: MatGetLocalSize(matis->A, &n, NULL);
6868: PetscMalloc1(n, &local_subs);
6869: for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
6870: for (i = 0; i < pcbddc->n_local_subs; i++) {
6871: const PetscInt *idxs;
6872: PetscInt nl, j;
6874: ISGetLocalSize(pcbddc->local_subs[i], &nl);
6875: ISGetIndices(pcbddc->local_subs[i], &idxs);
6876: for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
6877: ISRestoreIndices(pcbddc->local_subs[i], &idxs);
6878: }
6879: for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
6880: pcbddc->mat_graph->n_local_subs = totn + 1;
6881: pcbddc->mat_graph->local_subs = local_subs;
6882: }
6883: }
6885: if (!pcbddc->graphanalyzed) {
6886: /* Graph's connected components analysis */
6887: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6888: pcbddc->graphanalyzed = PETSC_TRUE;
6889: pcbddc->corner_selected = pcbddc->corner_selection;
6890: }
6891: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6892: return 0;
6893: }
6895: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
6896: {
6897: PetscInt i, j, n;
6898: PetscScalar *alphas;
6899: PetscReal norm, *onorms;
6901: n = *nio;
6902: if (!n) return 0;
6903: PetscMalloc2(n, &alphas, n, &onorms);
6904: VecNormalize(vecs[0], &norm);
6905: if (norm < PETSC_SMALL) {
6906: onorms[0] = 0.0;
6907: VecSet(vecs[0], 0.0);
6908: } else {
6909: onorms[0] = norm;
6910: }
6912: for (i = 1; i < n; i++) {
6913: VecMDot(vecs[i], i, vecs, alphas);
6914: for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
6915: VecMAXPY(vecs[i], i, alphas, vecs);
6916: VecNormalize(vecs[i], &norm);
6917: if (norm < PETSC_SMALL) {
6918: onorms[i] = 0.0;
6919: VecSet(vecs[i], 0.0);
6920: } else {
6921: onorms[i] = norm;
6922: }
6923: }
6924: /* push nonzero vectors at the beginning */
6925: for (i = 0; i < n; i++) {
6926: if (onorms[i] == 0.0) {
6927: for (j = i + 1; j < n; j++) {
6928: if (onorms[j] != 0.0) {
6929: VecCopy(vecs[j], vecs[i]);
6930: onorms[j] = 0.0;
6931: }
6932: }
6933: }
6934: }
6935: for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
6936: PetscFree2(alphas, onorms);
6937: return 0;
6938: }
6940: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
6941: {
6942: ISLocalToGlobalMapping mapping;
6943: Mat A;
6944: PetscInt n_neighs, *neighs, *n_shared, **shared;
6945: PetscMPIInt size, rank, color;
6946: PetscInt *xadj, *adjncy;
6947: PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
6948: PetscInt im_active, active_procs, N, n, i, j, threshold = 2;
6949: PetscInt void_procs, *procs_candidates = NULL;
6950: PetscInt xadj_count, *count;
6951: PetscBool ismatis, use_vwgt = PETSC_FALSE;
6952: PetscSubcomm psubcomm;
6953: MPI_Comm subcomm;
6956: PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis);
6962: if (have_void) *have_void = PETSC_FALSE;
6963: MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size);
6964: MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank);
6965: MatISGetLocalMat(mat, &A);
6966: MatGetLocalSize(A, &n, NULL);
6967: im_active = !!n;
6968: MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat));
6969: void_procs = size - active_procs;
6970: /* get ranks of of non-active processes in mat communicator */
6971: if (void_procs) {
6972: PetscInt ncand;
6974: if (have_void) *have_void = PETSC_TRUE;
6975: PetscMalloc1(size, &procs_candidates);
6976: MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat));
6977: for (i = 0, ncand = 0; i < size; i++) {
6978: if (!procs_candidates[i]) procs_candidates[ncand++] = i;
6979: }
6980: /* force n_subdomains to be not greater that the number of non-active processes */
6981: *n_subdomains = PetscMin(void_procs, *n_subdomains);
6982: }
6984: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
6985: number of subdomains requested 1 -> send to rank-0 or first candidate in voids */
6986: MatGetSize(mat, &N, NULL);
6987: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
6988: PetscInt issize, isidx, dest;
6989: if (*n_subdomains == 1) dest = 0;
6990: else dest = rank;
6991: if (im_active) {
6992: issize = 1;
6993: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6994: isidx = procs_candidates[dest];
6995: } else {
6996: isidx = dest;
6997: }
6998: } else {
6999: issize = 0;
7000: isidx = -1;
7001: }
7002: if (*n_subdomains != 1) *n_subdomains = active_procs;
7003: ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends);
7004: PetscFree(procs_candidates);
7005: return 0;
7006: }
7007: PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL);
7008: PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL);
7009: threshold = PetscMax(threshold, 2);
7011: /* Get info on mapping */
7012: MatISGetLocalToGlobalMapping(mat, &mapping, NULL);
7013: ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared);
7015: /* build local CSR graph of subdomains' connectivity */
7016: PetscMalloc1(2, &xadj);
7017: xadj[0] = 0;
7018: xadj[1] = PetscMax(n_neighs - 1, 0);
7019: PetscMalloc1(xadj[1], &adjncy);
7020: PetscMalloc1(xadj[1], &adjncy_wgt);
7021: PetscCalloc1(n, &count);
7022: for (i = 1; i < n_neighs; i++)
7023: for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7025: xadj_count = 0;
7026: for (i = 1; i < n_neighs; i++) {
7027: for (j = 0; j < n_shared[i]; j++) {
7028: if (count[shared[i][j]] < threshold) {
7029: adjncy[xadj_count] = neighs[i];
7030: adjncy_wgt[xadj_count] = n_shared[i];
7031: xadj_count++;
7032: break;
7033: }
7034: }
7035: }
7036: xadj[1] = xadj_count;
7037: PetscFree(count);
7038: ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared);
7039: PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt);
7041: PetscMalloc1(1, &ranks_send_to_idx);
7043: /* Restrict work on active processes only */
7044: PetscMPIIntCast(im_active, &color);
7045: if (void_procs) {
7046: PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm);
7047: PetscSubcommSetNumber(psubcomm, 2); /* 2 groups, active process and not active processes */
7048: PetscSubcommSetTypeGeneral(psubcomm, color, rank);
7049: subcomm = PetscSubcommChild(psubcomm);
7050: } else {
7051: psubcomm = NULL;
7052: subcomm = PetscObjectComm((PetscObject)mat);
7053: }
7055: v_wgt = NULL;
7056: if (!color) {
7057: PetscFree(xadj);
7058: PetscFree(adjncy);
7059: PetscFree(adjncy_wgt);
7060: } else {
7061: Mat subdomain_adj;
7062: IS new_ranks, new_ranks_contig;
7063: MatPartitioning partitioner;
7064: PetscInt rstart = 0, rend = 0;
7065: PetscInt *is_indices, *oldranks;
7066: PetscMPIInt size;
7067: PetscBool aggregate;
7069: MPI_Comm_size(subcomm, &size);
7070: if (void_procs) {
7071: PetscInt prank = rank;
7072: PetscMalloc1(size, &oldranks);
7073: MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm);
7074: for (i = 0; i < xadj[1]; i++) PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]);
7075: PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt);
7076: } else {
7077: oldranks = NULL;
7078: }
7079: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7080: if (aggregate) { /* TODO: all this part could be made more efficient */
7081: PetscInt lrows, row, ncols, *cols;
7082: PetscMPIInt nrank;
7083: PetscScalar *vals;
7085: MPI_Comm_rank(subcomm, &nrank);
7086: lrows = 0;
7087: if (nrank < redprocs) {
7088: lrows = size / redprocs;
7089: if (nrank < size % redprocs) lrows++;
7090: }
7091: MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj);
7092: MatGetOwnershipRange(subdomain_adj, &rstart, &rend);
7093: MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
7094: MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
7095: row = nrank;
7096: ncols = xadj[1] - xadj[0];
7097: cols = adjncy;
7098: PetscMalloc1(ncols, &vals);
7099: for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7100: MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES);
7101: MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY);
7102: MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY);
7103: PetscFree(xadj);
7104: PetscFree(adjncy);
7105: PetscFree(adjncy_wgt);
7106: PetscFree(vals);
7107: if (use_vwgt) {
7108: Vec v;
7109: const PetscScalar *array;
7110: PetscInt nl;
7112: MatCreateVecs(subdomain_adj, &v, NULL);
7113: VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES);
7114: VecAssemblyBegin(v);
7115: VecAssemblyEnd(v);
7116: VecGetLocalSize(v, &nl);
7117: VecGetArrayRead(v, &array);
7118: PetscMalloc1(nl, &v_wgt);
7119: for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7120: VecRestoreArrayRead(v, &array);
7121: VecDestroy(&v);
7122: }
7123: } else {
7124: MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj);
7125: if (use_vwgt) {
7126: PetscMalloc1(1, &v_wgt);
7127: v_wgt[0] = n;
7128: }
7129: }
7130: /* MatView(subdomain_adj,0); */
7132: /* Partition */
7133: MatPartitioningCreate(subcomm, &partitioner);
7134: #if defined(PETSC_HAVE_PTSCOTCH)
7135: MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH);
7136: #elif defined(PETSC_HAVE_PARMETIS)
7137: MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS);
7138: #else
7139: MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE);
7140: #endif
7141: MatPartitioningSetAdjacency(partitioner, subdomain_adj);
7142: if (v_wgt) MatPartitioningSetVertexWeights(partitioner, v_wgt);
7143: *n_subdomains = PetscMin((PetscInt)size, *n_subdomains);
7144: MatPartitioningSetNParts(partitioner, *n_subdomains);
7145: MatPartitioningSetFromOptions(partitioner);
7146: MatPartitioningApply(partitioner, &new_ranks);
7147: /* MatPartitioningView(partitioner,0); */
7149: /* renumber new_ranks to avoid "holes" in new set of processors */
7150: ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig);
7151: ISDestroy(&new_ranks);
7152: ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices);
7153: if (!aggregate) {
7154: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7155: PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7156: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7157: } else if (oldranks) {
7158: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7159: } else {
7160: ranks_send_to_idx[0] = is_indices[0];
7161: }
7162: } else {
7163: PetscInt idx = 0;
7164: PetscMPIInt tag;
7165: MPI_Request *reqs;
7167: PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag);
7168: PetscMalloc1(rend - rstart, &reqs);
7169: for (i = rstart; i < rend; i++) MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]);
7170: MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE);
7171: MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE);
7172: PetscFree(reqs);
7173: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7174: PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7175: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7176: } else if (oldranks) {
7177: ranks_send_to_idx[0] = oldranks[idx];
7178: } else {
7179: ranks_send_to_idx[0] = idx;
7180: }
7181: }
7182: ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices);
7183: /* clean up */
7184: PetscFree(oldranks);
7185: ISDestroy(&new_ranks_contig);
7186: MatDestroy(&subdomain_adj);
7187: MatPartitioningDestroy(&partitioner);
7188: }
7189: PetscSubcommDestroy(&psubcomm);
7190: PetscFree(procs_candidates);
7192: /* assemble parallel IS for sends */
7193: i = 1;
7194: if (!color) i = 0;
7195: ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends);
7196: return 0;
7197: }
7199: typedef enum {
7200: MATDENSE_PRIVATE = 0,
7201: MATAIJ_PRIVATE,
7202: MATBAIJ_PRIVATE,
7203: MATSBAIJ_PRIVATE
7204: } MatTypePrivate;
7206: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7207: {
7208: Mat local_mat;
7209: IS is_sends_internal;
7210: PetscInt rows, cols, new_local_rows;
7211: PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7212: PetscBool ismatis, isdense, newisdense, destroy_mat;
7213: ISLocalToGlobalMapping l2gmap;
7214: PetscInt *l2gmap_indices;
7215: const PetscInt *is_indices;
7216: MatType new_local_type;
7217: /* buffers */
7218: PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7219: PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7220: PetscInt *recv_buffer_idxs_local;
7221: PetscScalar *ptr_vals, *recv_buffer_vals;
7222: const PetscScalar *send_buffer_vals;
7223: PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7224: /* MPI */
7225: MPI_Comm comm, comm_n;
7226: PetscSubcomm subcomm;
7227: PetscMPIInt n_sends, n_recvs, size;
7228: PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7229: PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7230: PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7231: MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7232: MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7235: PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis);
7243: if (nvecs) {
7246: }
7247: /* further checks */
7248: MatISGetLocalMat(mat, &local_mat);
7249: PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense);
7251: MatGetSize(local_mat, &rows, &cols);
7253: if (reuse && *mat_n) {
7254: PetscInt mrows, mcols, mnrows, mncols;
7256: PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis);
7258: MatGetSize(mat, &mrows, &mcols);
7259: MatGetSize(*mat_n, &mnrows, &mncols);
7262: }
7263: MatGetBlockSize(local_mat, &bs);
7266: /* prepare IS for sending if not provided */
7267: if (!is_sends) {
7269: PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL);
7270: } else {
7271: PetscObjectReference((PetscObject)is_sends);
7272: is_sends_internal = is_sends;
7273: }
7275: /* get comm */
7276: PetscObjectGetComm((PetscObject)mat, &comm);
7278: /* compute number of sends */
7279: ISGetLocalSize(is_sends_internal, &i);
7280: PetscMPIIntCast(i, &n_sends);
7282: /* compute number of receives */
7283: MPI_Comm_size(comm, &size);
7284: PetscMalloc1(size, &iflags);
7285: PetscArrayzero(iflags, size);
7286: ISGetIndices(is_sends_internal, &is_indices);
7287: for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7288: PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs);
7289: PetscFree(iflags);
7291: /* restrict comm if requested */
7292: subcomm = NULL;
7293: destroy_mat = PETSC_FALSE;
7294: if (restrict_comm) {
7295: PetscMPIInt color, subcommsize;
7297: color = 0;
7298: if (restrict_full) {
7299: if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7300: } else {
7301: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7302: }
7303: MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm);
7304: subcommsize = size - subcommsize;
7305: /* check if reuse has been requested */
7306: if (reuse) {
7307: if (*mat_n) {
7308: PetscMPIInt subcommsize2;
7309: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2);
7311: comm_n = PetscObjectComm((PetscObject)*mat_n);
7312: } else {
7313: comm_n = PETSC_COMM_SELF;
7314: }
7315: } else { /* MAT_INITIAL_MATRIX */
7316: PetscMPIInt rank;
7318: MPI_Comm_rank(comm, &rank);
7319: PetscSubcommCreate(comm, &subcomm);
7320: PetscSubcommSetNumber(subcomm, 2);
7321: PetscSubcommSetTypeGeneral(subcomm, color, rank);
7322: comm_n = PetscSubcommChild(subcomm);
7323: }
7324: /* flag to destroy *mat_n if not significative */
7325: if (color) destroy_mat = PETSC_TRUE;
7326: } else {
7327: comm_n = comm;
7328: }
7330: /* prepare send/receive buffers */
7331: PetscMalloc1(size, &ilengths_idxs);
7332: PetscArrayzero(ilengths_idxs, size);
7333: PetscMalloc1(size, &ilengths_vals);
7334: PetscArrayzero(ilengths_vals, size);
7335: if (nis) PetscCalloc1(size, &ilengths_idxs_is);
7337: /* Get data from local matrices */
7339: /* TODO: See below some guidelines on how to prepare the local buffers */
7340: /*
7341: send_buffer_vals should contain the raw values of the local matrix
7342: send_buffer_idxs should contain:
7343: - MatType_PRIVATE type
7344: - PetscInt size_of_l2gmap
7345: - PetscInt global_row_indices[size_of_l2gmap]
7346: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7347: */
7348: {
7349: ISLocalToGlobalMapping mapping;
7351: MatISGetLocalToGlobalMapping(mat, &mapping, NULL);
7352: MatDenseGetArrayRead(local_mat, &send_buffer_vals);
7353: ISLocalToGlobalMappingGetSize(mapping, &i);
7354: PetscMalloc1(i + 2, &send_buffer_idxs);
7355: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7356: send_buffer_idxs[1] = i;
7357: ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs);
7358: PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i);
7359: ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs);
7360: PetscMPIIntCast(i, &len);
7361: for (i = 0; i < n_sends; i++) {
7362: ilengths_vals[is_indices[i]] = len * len;
7363: ilengths_idxs[is_indices[i]] = len + 2;
7364: }
7365: }
7366: PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals);
7367: /* additional is (if any) */
7368: if (nis) {
7369: PetscMPIInt psum;
7370: PetscInt j;
7371: for (j = 0, psum = 0; j < nis; j++) {
7372: PetscInt plen;
7373: ISGetLocalSize(isarray[j], &plen);
7374: PetscMPIIntCast(plen, &len);
7375: psum += len + 1; /* indices + length */
7376: }
7377: PetscMalloc1(psum, &send_buffer_idxs_is);
7378: for (j = 0, psum = 0; j < nis; j++) {
7379: PetscInt plen;
7380: const PetscInt *is_array_idxs;
7381: ISGetLocalSize(isarray[j], &plen);
7382: send_buffer_idxs_is[psum] = plen;
7383: ISGetIndices(isarray[j], &is_array_idxs);
7384: PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen);
7385: ISRestoreIndices(isarray[j], &is_array_idxs);
7386: psum += plen + 1; /* indices + length */
7387: }
7388: for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
7389: PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is);
7390: }
7391: MatISRestoreLocalMat(mat, &local_mat);
7393: buf_size_idxs = 0;
7394: buf_size_vals = 0;
7395: buf_size_idxs_is = 0;
7396: buf_size_vecs = 0;
7397: for (i = 0; i < n_recvs; i++) {
7398: buf_size_idxs += (PetscInt)olengths_idxs[i];
7399: buf_size_vals += (PetscInt)olengths_vals[i];
7400: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7401: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7402: }
7403: PetscMalloc1(buf_size_idxs, &recv_buffer_idxs);
7404: PetscMalloc1(buf_size_vals, &recv_buffer_vals);
7405: PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is);
7406: PetscMalloc1(buf_size_vecs, &recv_buffer_vecs);
7408: /* get new tags for clean communications */
7409: PetscObjectGetNewTag((PetscObject)mat, &tag_idxs);
7410: PetscObjectGetNewTag((PetscObject)mat, &tag_vals);
7411: PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is);
7412: PetscObjectGetNewTag((PetscObject)mat, &tag_vecs);
7414: /* allocate for requests */
7415: PetscMalloc1(n_sends, &send_req_idxs);
7416: PetscMalloc1(n_sends, &send_req_vals);
7417: PetscMalloc1(n_sends, &send_req_idxs_is);
7418: PetscMalloc1(n_sends, &send_req_vecs);
7419: PetscMalloc1(n_recvs, &recv_req_idxs);
7420: PetscMalloc1(n_recvs, &recv_req_vals);
7421: PetscMalloc1(n_recvs, &recv_req_idxs_is);
7422: PetscMalloc1(n_recvs, &recv_req_vecs);
7424: /* communications */
7425: ptr_idxs = recv_buffer_idxs;
7426: ptr_vals = recv_buffer_vals;
7427: ptr_idxs_is = recv_buffer_idxs_is;
7428: ptr_vecs = recv_buffer_vecs;
7429: for (i = 0; i < n_recvs; i++) {
7430: source_dest = onodes[i];
7431: MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i]);
7432: MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i]);
7433: ptr_idxs += olengths_idxs[i];
7434: ptr_vals += olengths_vals[i];
7435: if (nis) {
7436: source_dest = onodes_is[i];
7437: MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i]);
7438: ptr_idxs_is += olengths_idxs_is[i];
7439: }
7440: if (nvecs) {
7441: source_dest = onodes[i];
7442: MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i]);
7443: ptr_vecs += olengths_idxs[i] - 2;
7444: }
7445: }
7446: for (i = 0; i < n_sends; i++) {
7447: PetscMPIIntCast(is_indices[i], &source_dest);
7448: MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]);
7449: MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]);
7450: if (nis) MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]);
7451: if (nvecs) {
7452: VecGetArray(nnsp_vec[0], &send_buffer_vecs);
7453: MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]);
7454: }
7455: }
7456: ISRestoreIndices(is_sends_internal, &is_indices);
7457: ISDestroy(&is_sends_internal);
7459: /* assemble new l2g map */
7460: MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE);
7461: ptr_idxs = recv_buffer_idxs;
7462: new_local_rows = 0;
7463: for (i = 0; i < n_recvs; i++) {
7464: new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7465: ptr_idxs += olengths_idxs[i];
7466: }
7467: PetscMalloc1(new_local_rows, &l2gmap_indices);
7468: ptr_idxs = recv_buffer_idxs;
7469: new_local_rows = 0;
7470: for (i = 0; i < n_recvs; i++) {
7471: PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1));
7472: new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7473: ptr_idxs += olengths_idxs[i];
7474: }
7475: PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices);
7476: ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap);
7477: PetscFree(l2gmap_indices);
7479: /* infer new local matrix type from received local matrices type */
7480: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7481: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7482: if (n_recvs) {
7483: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7484: ptr_idxs = recv_buffer_idxs;
7485: for (i = 0; i < n_recvs; i++) {
7486: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7487: new_local_type_private = MATAIJ_PRIVATE;
7488: break;
7489: }
7490: ptr_idxs += olengths_idxs[i];
7491: }
7492: switch (new_local_type_private) {
7493: case MATDENSE_PRIVATE:
7494: new_local_type = MATSEQAIJ;
7495: bs = 1;
7496: break;
7497: case MATAIJ_PRIVATE:
7498: new_local_type = MATSEQAIJ;
7499: bs = 1;
7500: break;
7501: case MATBAIJ_PRIVATE:
7502: new_local_type = MATSEQBAIJ;
7503: break;
7504: case MATSBAIJ_PRIVATE:
7505: new_local_type = MATSEQSBAIJ;
7506: break;
7507: default:
7508: SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
7509: }
7510: } else { /* by default, new_local_type is seqaij */
7511: new_local_type = MATSEQAIJ;
7512: bs = 1;
7513: }
7515: /* create MATIS object if needed */
7516: if (!reuse) {
7517: MatGetSize(mat, &rows, &cols);
7518: MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n);
7519: } else {
7520: /* it also destroys the local matrices */
7521: if (*mat_n) {
7522: MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap);
7523: } else { /* this is a fake object */
7524: MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n);
7525: }
7526: }
7527: MatISGetLocalMat(*mat_n, &local_mat);
7528: MatSetType(local_mat, new_local_type);
7530: MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE);
7532: /* Global to local map of received indices */
7533: PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local); /* needed for values insertion */
7534: ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local);
7535: ISLocalToGlobalMappingDestroy(&l2gmap);
7537: /* restore attributes -> type of incoming data and its size */
7538: buf_size_idxs = 0;
7539: for (i = 0; i < n_recvs; i++) {
7540: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7541: recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
7542: buf_size_idxs += (PetscInt)olengths_idxs[i];
7543: }
7544: PetscFree(recv_buffer_idxs);
7546: /* set preallocation */
7547: PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense);
7548: if (!newisdense) {
7549: PetscInt *new_local_nnz = NULL;
7551: ptr_idxs = recv_buffer_idxs_local;
7552: if (n_recvs) PetscCalloc1(new_local_rows, &new_local_nnz);
7553: for (i = 0; i < n_recvs; i++) {
7554: PetscInt j;
7555: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7556: for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
7557: } else {
7558: /* TODO */
7559: }
7560: ptr_idxs += olengths_idxs[i];
7561: }
7562: if (new_local_nnz) {
7563: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
7564: MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz);
7565: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
7566: MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz);
7567: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
7568: MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz);
7569: } else {
7570: MatSetUp(local_mat);
7571: }
7572: PetscFree(new_local_nnz);
7573: } else {
7574: MatSetUp(local_mat);
7575: }
7577: /* set values */
7578: ptr_vals = recv_buffer_vals;
7579: ptr_idxs = recv_buffer_idxs_local;
7580: for (i = 0; i < n_recvs; i++) {
7581: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7582: MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE);
7583: MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES);
7584: MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY);
7585: MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY);
7586: MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE);
7587: } else {
7588: /* TODO */
7589: }
7590: ptr_idxs += olengths_idxs[i];
7591: ptr_vals += olengths_vals[i];
7592: }
7593: MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY);
7594: MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY);
7595: MatISRestoreLocalMat(*mat_n, &local_mat);
7596: MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY);
7597: MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY);
7598: PetscFree(recv_buffer_vals);
7600: #if 0
7601: if (!restrict_comm) { /* check */
7602: Vec lvec,rvec;
7603: PetscReal infty_error;
7605: MatCreateVecs(mat,&rvec,&lvec);
7606: VecSetRandom(rvec,NULL);
7607: MatMult(mat,rvec,lvec);
7608: VecScale(lvec,-1.0);
7609: MatMultAdd(*mat_n,rvec,lvec,lvec);
7610: VecNorm(lvec,NORM_INFINITY,&infty_error);
7611: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7612: VecDestroy(&rvec);
7613: VecDestroy(&lvec);
7614: }
7615: #endif
7617: /* assemble new additional is (if any) */
7618: if (nis) {
7619: PetscInt **temp_idxs, *count_is, j, psum;
7621: MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE);
7622: PetscCalloc1(nis, &count_is);
7623: ptr_idxs = recv_buffer_idxs_is;
7624: psum = 0;
7625: for (i = 0; i < n_recvs; i++) {
7626: for (j = 0; j < nis; j++) {
7627: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7628: count_is[j] += plen; /* increment counting of buffer for j-th IS */
7629: psum += plen;
7630: ptr_idxs += plen + 1; /* shift pointer to received data */
7631: }
7632: }
7633: PetscMalloc1(nis, &temp_idxs);
7634: PetscMalloc1(psum, &temp_idxs[0]);
7635: for (i = 1; i < nis; i++) temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1];
7636: PetscArrayzero(count_is, nis);
7637: ptr_idxs = recv_buffer_idxs_is;
7638: for (i = 0; i < n_recvs; i++) {
7639: for (j = 0; j < nis; j++) {
7640: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7641: PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen);
7642: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7643: ptr_idxs += plen + 1; /* shift pointer to received data */
7644: }
7645: }
7646: for (i = 0; i < nis; i++) {
7647: ISDestroy(&isarray[i]);
7648: PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]);
7649: ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]);
7650: }
7651: PetscFree(count_is);
7652: PetscFree(temp_idxs[0]);
7653: PetscFree(temp_idxs);
7654: }
7655: /* free workspace */
7656: PetscFree(recv_buffer_idxs_is);
7657: MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE);
7658: PetscFree(send_buffer_idxs);
7659: MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE);
7660: if (isdense) {
7661: MatISGetLocalMat(mat, &local_mat);
7662: MatDenseRestoreArrayRead(local_mat, &send_buffer_vals);
7663: MatISRestoreLocalMat(mat, &local_mat);
7664: } else {
7665: /* PetscFree(send_buffer_vals); */
7666: }
7667: if (nis) {
7668: MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE);
7669: PetscFree(send_buffer_idxs_is);
7670: }
7672: if (nvecs) {
7673: MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE);
7674: MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE);
7675: VecRestoreArray(nnsp_vec[0], &send_buffer_vecs);
7676: VecDestroy(&nnsp_vec[0]);
7677: VecCreate(comm_n, &nnsp_vec[0]);
7678: VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE);
7679: VecSetType(nnsp_vec[0], VECSTANDARD);
7680: /* set values */
7681: ptr_vals = recv_buffer_vecs;
7682: ptr_idxs = recv_buffer_idxs_local;
7683: VecGetArray(nnsp_vec[0], &send_buffer_vecs);
7684: for (i = 0; i < n_recvs; i++) {
7685: PetscInt j;
7686: for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
7687: ptr_idxs += olengths_idxs[i];
7688: ptr_vals += olengths_idxs[i] - 2;
7689: }
7690: VecRestoreArray(nnsp_vec[0], &send_buffer_vecs);
7691: VecAssemblyBegin(nnsp_vec[0]);
7692: VecAssemblyEnd(nnsp_vec[0]);
7693: }
7695: PetscFree(recv_buffer_vecs);
7696: PetscFree(recv_buffer_idxs_local);
7697: PetscFree(recv_req_idxs);
7698: PetscFree(recv_req_vals);
7699: PetscFree(recv_req_vecs);
7700: PetscFree(recv_req_idxs_is);
7701: PetscFree(send_req_idxs);
7702: PetscFree(send_req_vals);
7703: PetscFree(send_req_vecs);
7704: PetscFree(send_req_idxs_is);
7705: PetscFree(ilengths_vals);
7706: PetscFree(ilengths_idxs);
7707: PetscFree(olengths_vals);
7708: PetscFree(olengths_idxs);
7709: PetscFree(onodes);
7710: if (nis) {
7711: PetscFree(ilengths_idxs_is);
7712: PetscFree(olengths_idxs_is);
7713: PetscFree(onodes_is);
7714: }
7715: PetscSubcommDestroy(&subcomm);
7716: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
7717: MatDestroy(mat_n);
7718: for (i = 0; i < nis; i++) ISDestroy(&isarray[i]);
7719: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7720: VecDestroy(&nnsp_vec[0]);
7721: }
7722: *mat_n = NULL;
7723: }
7724: return 0;
7725: }
7727: /* temporary hack into ksp private data structure */
7728: #include <petsc/private/kspimpl.h>
7730: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals)
7731: {
7732: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
7733: PC_IS *pcis = (PC_IS *)pc->data;
7734: Mat coarse_mat, coarse_mat_is, coarse_submat_dense;
7735: Mat coarsedivudotp = NULL;
7736: Mat coarseG, t_coarse_mat_is;
7737: MatNullSpace CoarseNullSpace = NULL;
7738: ISLocalToGlobalMapping coarse_islg;
7739: IS coarse_is, *isarray, corners;
7740: PetscInt i, im_active = -1, active_procs = -1;
7741: PetscInt nis, nisdofs, nisneu, nisvert;
7742: PetscInt coarse_eqs_per_proc;
7743: PC pc_temp;
7744: PCType coarse_pc_type;
7745: KSPType coarse_ksp_type;
7746: PetscBool multilevel_requested, multilevel_allowed;
7747: PetscBool coarse_reuse;
7748: PetscInt ncoarse, nedcfield;
7749: PetscBool compute_vecs = PETSC_FALSE;
7750: PetscScalar *array;
7751: MatReuse coarse_mat_reuse;
7752: PetscBool restr, full_restr, have_void;
7753: PetscMPIInt size;
7755: PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0);
7756: /* Assign global numbering to coarse dofs */
7757: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7758: PetscInt ocoarse_size;
7759: compute_vecs = PETSC_TRUE;
7761: pcbddc->new_primal_space = PETSC_TRUE;
7762: ocoarse_size = pcbddc->coarse_size;
7763: PetscFree(pcbddc->global_primal_indices);
7764: PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices);
7765: /* see if we can avoid some work */
7766: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7767: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7768: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7769: KSPReset(pcbddc->coarse_ksp);
7770: coarse_reuse = PETSC_FALSE;
7771: } else { /* we can safely reuse already computed coarse matrix */
7772: coarse_reuse = PETSC_TRUE;
7773: }
7774: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7775: coarse_reuse = PETSC_FALSE;
7776: }
7777: /* reset any subassembling information */
7778: if (!coarse_reuse || pcbddc->recompute_topography) ISDestroy(&pcbddc->coarse_subassembling);
7779: } else { /* primal space is unchanged, so we can reuse coarse matrix */
7780: coarse_reuse = PETSC_TRUE;
7781: }
7782: if (coarse_reuse && pcbddc->coarse_ksp) {
7783: KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL);
7784: PetscObjectReference((PetscObject)coarse_mat);
7785: coarse_mat_reuse = MAT_REUSE_MATRIX;
7786: } else {
7787: coarse_mat = NULL;
7788: coarse_mat_reuse = MAT_INITIAL_MATRIX;
7789: }
7791: /* creates temporary l2gmap and IS for coarse indexes */
7792: ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is);
7793: ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg);
7795: /* creates temporary MATIS object for coarse matrix */
7796: MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense);
7797: MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is);
7798: MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense);
7799: MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY);
7800: MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY);
7801: MatDestroy(&coarse_submat_dense);
7803: /* count "active" (i.e. with positive local size) and "void" processes */
7804: im_active = !!(pcis->n);
7805: MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
7807: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7808: /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
7809: /* full_restr : just use the receivers from the subassembling pattern */
7810: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
7811: coarse_mat_is = NULL;
7812: multilevel_allowed = PETSC_FALSE;
7813: multilevel_requested = PETSC_FALSE;
7814: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
7815: if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
7816: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7817: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
7818: if (multilevel_requested) {
7819: ncoarse = active_procs / pcbddc->coarsening_ratio;
7820: restr = PETSC_FALSE;
7821: full_restr = PETSC_FALSE;
7822: } else {
7823: ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
7824: restr = PETSC_TRUE;
7825: full_restr = PETSC_TRUE;
7826: }
7827: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7828: ncoarse = PetscMax(1, ncoarse);
7829: if (!pcbddc->coarse_subassembling) {
7830: if (pcbddc->coarsening_ratio > 1) {
7831: if (multilevel_requested) {
7832: PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void);
7833: } else {
7834: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void);
7835: }
7836: } else {
7837: PetscMPIInt rank;
7839: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
7840: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
7841: ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling);
7842: }
7843: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7844: PetscInt psum;
7845: if (pcbddc->coarse_ksp) psum = 1;
7846: else psum = 0;
7847: MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
7848: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
7849: }
7850: /* determine if we can go multilevel */
7851: if (multilevel_requested) {
7852: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7853: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7854: }
7855: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
7857: /* dump subassembling pattern */
7858: if (pcbddc->dbg_flag && multilevel_allowed) ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer);
7859: /* compute dofs splitting and neumann boundaries for coarse dofs */
7860: nedcfield = -1;
7861: corners = NULL;
7862: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
7863: PetscInt *tidxs, *tidxs2, nout, tsize, i;
7864: const PetscInt *idxs;
7865: ISLocalToGlobalMapping tmap;
7867: /* create map between primal indices (in local representative ordering) and local primal numbering */
7868: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap);
7869: /* allocate space for temporary storage */
7870: PetscMalloc1(pcbddc->local_primal_size, &tidxs);
7871: PetscMalloc1(pcbddc->local_primal_size, &tidxs2);
7872: /* allocate for IS array */
7873: nisdofs = pcbddc->n_ISForDofsLocal;
7874: if (pcbddc->nedclocal) {
7875: if (pcbddc->nedfield > -1) {
7876: nedcfield = pcbddc->nedfield;
7877: } else {
7878: nedcfield = 0;
7880: nisdofs = 1;
7881: }
7882: }
7883: nisneu = !!pcbddc->NeumannBoundariesLocal;
7884: nisvert = 0; /* nisvert is not used */
7885: nis = nisdofs + nisneu + nisvert;
7886: PetscMalloc1(nis, &isarray);
7887: /* dofs splitting */
7888: for (i = 0; i < nisdofs; i++) {
7889: /* ISView(pcbddc->ISForDofsLocal[i],0); */
7890: if (nedcfield != i) {
7891: ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize);
7892: ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs);
7893: ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs);
7894: ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs);
7895: } else {
7896: ISGetLocalSize(pcbddc->nedclocal, &tsize);
7897: ISGetIndices(pcbddc->nedclocal, &idxs);
7898: ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs);
7900: ISRestoreIndices(pcbddc->nedclocal, &idxs);
7901: }
7902: ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2);
7903: ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]);
7904: /* ISView(isarray[i],0); */
7905: }
7906: /* neumann boundaries */
7907: if (pcbddc->NeumannBoundariesLocal) {
7908: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
7909: ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize);
7910: ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs);
7911: ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs);
7912: ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs);
7913: ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2);
7914: ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]);
7915: /* ISView(isarray[nisdofs],0); */
7916: }
7917: /* coordinates */
7918: if (pcbddc->corner_selected) {
7919: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners);
7920: ISGetLocalSize(corners, &tsize);
7921: ISGetIndices(corners, &idxs);
7922: ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs);
7924: ISRestoreIndices(corners, &idxs);
7925: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners);
7926: ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2);
7927: ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners);
7928: }
7929: PetscFree(tidxs);
7930: PetscFree(tidxs2);
7931: ISLocalToGlobalMappingDestroy(&tmap);
7932: } else {
7933: nis = 0;
7934: nisdofs = 0;
7935: nisneu = 0;
7936: nisvert = 0;
7937: isarray = NULL;
7938: }
7939: /* destroy no longer needed map */
7940: ISLocalToGlobalMappingDestroy(&coarse_islg);
7942: /* subassemble */
7943: if (multilevel_allowed) {
7944: Vec vp[1];
7945: PetscInt nvecs = 0;
7946: PetscBool reuse, reuser;
7948: if (coarse_mat) reuse = PETSC_TRUE;
7949: else reuse = PETSC_FALSE;
7950: MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
7951: vp[0] = NULL;
7952: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7953: VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]);
7954: VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE);
7955: VecSetType(vp[0], VECSTANDARD);
7956: nvecs = 1;
7958: if (pcbddc->divudotp) {
7959: Mat B, loc_divudotp;
7960: Vec v, p;
7961: IS dummy;
7962: PetscInt np;
7964: MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp);
7965: MatGetSize(loc_divudotp, &np, NULL);
7966: ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy);
7967: MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B);
7968: MatCreateVecs(B, &v, &p);
7969: VecSet(p, 1.);
7970: MatMultTranspose(B, p, v);
7971: VecDestroy(&p);
7972: MatDestroy(&B);
7973: VecGetArray(vp[0], &array);
7974: VecPlaceArray(pcbddc->vec1_P, array);
7975: MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P);
7976: VecResetArray(pcbddc->vec1_P);
7977: VecRestoreArray(vp[0], &array);
7978: ISDestroy(&dummy);
7979: VecDestroy(&v);
7980: }
7981: }
7982: if (reuser) {
7983: PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp);
7984: } else {
7985: PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp);
7986: }
7987: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
7988: PetscScalar *arraym;
7989: const PetscScalar *arrayv;
7990: PetscInt nl;
7991: VecGetLocalSize(vp[0], &nl);
7992: MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp);
7993: MatDenseGetArray(coarsedivudotp, &arraym);
7994: VecGetArrayRead(vp[0], &arrayv);
7995: PetscArraycpy(arraym, arrayv, nl);
7996: VecRestoreArrayRead(vp[0], &arrayv);
7997: MatDenseRestoreArray(coarsedivudotp, &arraym);
7998: VecDestroy(&vp[0]);
7999: } else {
8000: MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp);
8001: }
8002: } else {
8003: PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL);
8004: }
8005: if (coarse_mat_is || coarse_mat) {
8006: if (!multilevel_allowed) {
8007: MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat);
8008: } else {
8009: /* if this matrix is present, it means we are not reusing the coarse matrix */
8010: if (coarse_mat_is) {
8012: PetscObjectReference((PetscObject)coarse_mat_is);
8013: coarse_mat = coarse_mat_is;
8014: }
8015: }
8016: }
8017: MatDestroy(&t_coarse_mat_is);
8018: MatDestroy(&coarse_mat_is);
8020: /* create local to global scatters for coarse problem */
8021: if (compute_vecs) {
8022: PetscInt lrows;
8023: VecDestroy(&pcbddc->coarse_vec);
8024: if (coarse_mat) {
8025: MatGetLocalSize(coarse_mat, &lrows, NULL);
8026: } else {
8027: lrows = 0;
8028: }
8029: VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec);
8030: VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE);
8031: VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8032: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8033: VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob);
8034: }
8035: ISDestroy(&coarse_is);
8037: /* set defaults for coarse KSP and PC */
8038: if (multilevel_allowed) {
8039: coarse_ksp_type = KSPRICHARDSON;
8040: coarse_pc_type = PCBDDC;
8041: } else {
8042: coarse_ksp_type = KSPPREONLY;
8043: coarse_pc_type = PCREDUNDANT;
8044: }
8046: /* print some info if requested */
8047: if (pcbddc->dbg_flag) {
8048: if (!multilevel_allowed) {
8049: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
8050: if (multilevel_requested) {
8051: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio);
8052: } else if (pcbddc->max_levels) {
8053: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels);
8054: }
8055: PetscViewerFlush(pcbddc->dbg_viewer);
8056: }
8057: }
8059: /* communicate coarse discrete gradient */
8060: coarseG = NULL;
8061: if (pcbddc->nedcG && multilevel_allowed) {
8062: MPI_Comm ccomm;
8063: if (coarse_mat) {
8064: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8065: } else {
8066: ccomm = MPI_COMM_NULL;
8067: }
8068: MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG);
8069: }
8071: /* create the coarse KSP object only once with defaults */
8072: if (coarse_mat) {
8073: PetscBool isredundant, isbddc, force, valid;
8074: PetscViewer dbg_viewer = NULL;
8075: PetscBool isset, issym, isher, isspd;
8077: if (pcbddc->dbg_flag) {
8078: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8079: PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level);
8080: }
8081: if (!pcbddc->coarse_ksp) {
8082: char prefix[256], str_level[16];
8083: size_t len;
8085: KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp);
8086: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure);
8087: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1);
8088: KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1);
8089: KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat);
8090: KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type);
8091: KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE);
8092: KSPGetPC(pcbddc->coarse_ksp, &pc_temp);
8093: /* TODO is this logic correct? should check for coarse_mat type */
8094: PCSetType(pc_temp, coarse_pc_type);
8095: /* prefix */
8096: PetscStrcpy(prefix, "");
8097: PetscStrcpy(str_level, "");
8098: if (!pcbddc->current_level) {
8099: PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix));
8100: PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix));
8101: } else {
8102: PetscStrlen(((PetscObject)pc)->prefix, &len);
8103: if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */
8104: if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8105: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8106: PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1);
8107: PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level));
8108: PetscStrlcat(prefix, str_level, sizeof(prefix));
8109: }
8110: KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix);
8111: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8112: PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1);
8113: PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio);
8114: PCBDDCSetLevels(pc_temp, pcbddc->max_levels);
8115: /* allow user customization */
8116: KSPSetFromOptions(pcbddc->coarse_ksp);
8117: /* get some info after set from options */
8118: KSPGetPC(pcbddc->coarse_ksp, &pc_temp);
8119: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8120: force = PETSC_FALSE;
8121: PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL);
8122: PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "");
8123: PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc);
8124: if (multilevel_allowed && !force && !valid) {
8125: isbddc = PETSC_TRUE;
8126: PCSetType(pc_temp, PCBDDC);
8127: PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1);
8128: PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio);
8129: PCBDDCSetLevels(pc_temp, pcbddc->max_levels);
8130: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8131: PetscObjectOptionsBegin((PetscObject)pc_temp);
8132: (*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject);
8133: PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject);
8134: PetscOptionsEnd();
8135: pc_temp->setfromoptionscalled++;
8136: }
8137: }
8138: }
8139: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8140: KSPGetPC(pcbddc->coarse_ksp, &pc_temp);
8141: if (nisdofs) {
8142: PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray);
8143: for (i = 0; i < nisdofs; i++) ISDestroy(&isarray[i]);
8144: }
8145: if (nisneu) {
8146: PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]);
8147: ISDestroy(&isarray[nisdofs]);
8148: }
8149: if (nisvert) {
8150: PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]);
8151: ISDestroy(&isarray[nis - 1]);
8152: }
8153: if (coarseG) PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE);
8155: /* get some info after set from options */
8156: PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc);
8158: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8159: if (isbddc && !multilevel_allowed) PCSetType(pc_temp, coarse_pc_type);
8160: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8161: force = PETSC_FALSE;
8162: PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL);
8163: PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "");
8164: if (multilevel_requested && multilevel_allowed && !valid && !force) PCSetType(pc_temp, PCBDDC);
8165: PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant);
8166: if (isredundant) {
8167: KSP inner_ksp;
8168: PC inner_pc;
8170: PCRedundantGetKSP(pc_temp, &inner_ksp);
8171: KSPGetPC(inner_ksp, &inner_pc);
8172: }
8174: /* parameters which miss an API */
8175: PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc);
8176: if (isbddc) {
8177: PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8179: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8180: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8181: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8182: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8183: if (pcbddc_coarse->benign_saddle_point) {
8184: Mat coarsedivudotp_is;
8185: ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8186: IS row, col;
8187: const PetscInt *gidxs;
8188: PetscInt n, st, M, N;
8190: MatGetSize(coarsedivudotp, &n, NULL);
8191: MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat));
8192: st = st - n;
8193: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row);
8194: MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL);
8195: ISLocalToGlobalMappingGetSize(l2gmap, &n);
8196: ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs);
8197: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col);
8198: ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs);
8199: ISLocalToGlobalMappingCreateIS(row, &rl2g);
8200: ISLocalToGlobalMappingCreateIS(col, &cl2g);
8201: ISGetSize(row, &M);
8202: MatGetSize(coarse_mat, &N, NULL);
8203: ISDestroy(&row);
8204: ISDestroy(&col);
8205: MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is);
8206: MatSetType(coarsedivudotp_is, MATIS);
8207: MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N);
8208: MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g);
8209: ISLocalToGlobalMappingDestroy(&rl2g);
8210: ISLocalToGlobalMappingDestroy(&cl2g);
8211: MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp);
8212: MatDestroy(&coarsedivudotp);
8213: PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL);
8214: MatDestroy(&coarsedivudotp_is);
8215: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8216: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8217: }
8218: }
8220: /* propagate symmetry info of coarse matrix */
8221: MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE);
8222: MatIsSymmetricKnown(pc->pmat, &isset, &issym);
8223: if (isset) MatSetOption(coarse_mat, MAT_SYMMETRIC, issym);
8224: MatIsHermitianKnown(pc->pmat, &isset, &isher);
8225: if (isset) MatSetOption(coarse_mat, MAT_HERMITIAN, isher);
8226: MatIsSPDKnown(pc->pmat, &isset, &isspd);
8227: if (isset) MatSetOption(coarse_mat, MAT_SPD, isspd);
8229: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE);
8230: /* set operators */
8231: MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view");
8232: MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix);
8233: KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat);
8234: if (pcbddc->dbg_flag) PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level);
8235: }
8236: MatDestroy(&coarseG);
8237: PetscFree(isarray);
8238: #if 0
8239: {
8240: PetscViewer viewer;
8241: char filename[256];
8242: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8243: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8244: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8245: MatView(coarse_mat,viewer);
8246: PetscViewerPopFormat(viewer);
8247: PetscViewerDestroy(&viewer);
8248: }
8249: #endif
8251: if (corners) {
8252: Vec gv;
8253: IS is;
8254: const PetscInt *idxs;
8255: PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8256: PetscScalar *coords;
8259: VecGetSize(pcbddc->coarse_vec, &N);
8260: VecGetLocalSize(pcbddc->coarse_vec, &n);
8261: VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv);
8262: VecSetBlockSize(gv, cdim);
8263: VecSetSizes(gv, n * cdim, N * cdim);
8264: VecSetType(gv, VECSTANDARD);
8265: VecSetFromOptions(gv);
8266: VecSet(gv, PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */
8268: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is);
8269: ISGetLocalSize(is, &n);
8270: ISGetIndices(is, &idxs);
8271: PetscMalloc1(n * cdim, &coords);
8272: for (i = 0; i < n; i++) {
8273: for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8274: }
8275: ISRestoreIndices(is, &idxs);
8276: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is);
8278: ISGetLocalSize(corners, &n);
8279: ISGetIndices(corners, &idxs);
8280: VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES);
8281: ISRestoreIndices(corners, &idxs);
8282: PetscFree(coords);
8283: VecAssemblyBegin(gv);
8284: VecAssemblyEnd(gv);
8285: VecGetArray(gv, &coords);
8286: if (pcbddc->coarse_ksp) {
8287: PC coarse_pc;
8288: PetscBool isbddc;
8290: KSPGetPC(pcbddc->coarse_ksp, &coarse_pc);
8291: PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc);
8292: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8293: PetscReal *realcoords;
8295: VecGetLocalSize(gv, &n);
8296: #if defined(PETSC_USE_COMPLEX)
8297: PetscMalloc1(n, &realcoords);
8298: for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8299: #else
8300: realcoords = coords;
8301: #endif
8302: PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords);
8303: #if defined(PETSC_USE_COMPLEX)
8304: PetscFree(realcoords);
8305: #endif
8306: }
8307: }
8308: VecRestoreArray(gv, &coords);
8309: VecDestroy(&gv);
8310: }
8311: ISDestroy(&corners);
8313: if (pcbddc->coarse_ksp) {
8314: Vec crhs, csol;
8316: KSPGetSolution(pcbddc->coarse_ksp, &csol);
8317: KSPGetRhs(pcbddc->coarse_ksp, &crhs);
8318: if (!csol) MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL);
8319: if (!crhs) MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs));
8320: }
8321: MatDestroy(&coarsedivudotp);
8323: /* compute null space for coarse solver if the benign trick has been requested */
8324: if (pcbddc->benign_null) {
8325: VecSet(pcbddc->vec1_P, 0.);
8326: for (i = 0; i < pcbddc->benign_n; i++) VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES);
8327: VecAssemblyBegin(pcbddc->vec1_P);
8328: VecAssemblyEnd(pcbddc->vec1_P);
8329: VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD);
8330: VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD);
8331: if (coarse_mat) {
8332: Vec nullv;
8333: PetscScalar *array, *array2;
8334: PetscInt nl;
8336: MatCreateVecs(coarse_mat, &nullv, NULL);
8337: VecGetLocalSize(nullv, &nl);
8338: VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array);
8339: VecGetArray(nullv, &array2);
8340: PetscArraycpy(array2, array, nl);
8341: VecRestoreArray(nullv, &array2);
8342: VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array);
8343: VecNormalize(nullv, NULL);
8344: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace);
8345: VecDestroy(&nullv);
8346: }
8347: }
8348: PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0);
8350: PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0);
8351: if (pcbddc->coarse_ksp) {
8352: PetscBool ispreonly;
8354: if (CoarseNullSpace) {
8355: PetscBool isnull;
8357: MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull);
8358: if (isnull) MatSetNullSpace(coarse_mat, CoarseNullSpace);
8359: /* TODO: add local nullspaces (if any) */
8360: }
8361: /* setup coarse ksp */
8362: KSPSetUp(pcbddc->coarse_ksp);
8363: /* Check coarse problem if in debug mode or if solving with an iterative method */
8364: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly);
8365: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8366: KSP check_ksp;
8367: KSPType check_ksp_type;
8368: PC check_pc;
8369: Vec check_vec, coarse_vec;
8370: PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
8371: PetscInt its;
8372: PetscBool compute_eigs;
8373: PetscReal *eigs_r, *eigs_c;
8374: PetscInt neigs;
8375: const char *prefix;
8377: /* Create ksp object suitable for estimation of extreme eigenvalues */
8378: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp);
8379: PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0);
8380: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE);
8381: KSPSetOperators(check_ksp, coarse_mat, coarse_mat);
8382: KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size);
8383: /* prevent from setup unneeded object */
8384: KSPGetPC(check_ksp, &check_pc);
8385: PCSetType(check_pc, PCNONE);
8386: if (ispreonly) {
8387: check_ksp_type = KSPPREONLY;
8388: compute_eigs = PETSC_FALSE;
8389: } else {
8390: check_ksp_type = KSPGMRES;
8391: compute_eigs = PETSC_TRUE;
8392: }
8393: KSPSetType(check_ksp, check_ksp_type);
8394: KSPSetComputeSingularValues(check_ksp, compute_eigs);
8395: KSPSetComputeEigenvalues(check_ksp, compute_eigs);
8396: KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1);
8397: KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix);
8398: KSPSetOptionsPrefix(check_ksp, prefix);
8399: KSPAppendOptionsPrefix(check_ksp, "check_");
8400: KSPSetFromOptions(check_ksp);
8401: KSPSetUp(check_ksp);
8402: KSPGetPC(pcbddc->coarse_ksp, &check_pc);
8403: KSPSetPC(check_ksp, check_pc);
8404: /* create random vec */
8405: MatCreateVecs(coarse_mat, &coarse_vec, &check_vec);
8406: VecSetRandom(check_vec, NULL);
8407: MatMult(coarse_mat, check_vec, coarse_vec);
8408: /* solve coarse problem */
8409: KSPSolve(check_ksp, coarse_vec, coarse_vec);
8410: KSPCheckSolve(check_ksp, pc, coarse_vec);
8411: /* set eigenvalue estimation if preonly has not been requested */
8412: if (compute_eigs) {
8413: PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r);
8414: PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c);
8415: KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs);
8416: if (neigs) {
8417: lambda_max = eigs_r[neigs - 1];
8418: lambda_min = eigs_r[0];
8419: if (pcbddc->use_coarse_estimates) {
8420: if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8421: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min);
8422: KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min));
8423: }
8424: }
8425: }
8426: }
8428: /* check coarse problem residual error */
8429: if (pcbddc->dbg_flag) {
8430: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8431: PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1));
8432: VecAXPY(check_vec, -1.0, coarse_vec);
8433: VecNorm(check_vec, NORM_INFINITY, &infty_error);
8434: MatMult(coarse_mat, check_vec, coarse_vec);
8435: VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error);
8436: PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates);
8437: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer);
8438: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer);
8439: PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error);
8440: PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error);
8441: if (CoarseNullSpace) PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n");
8442: if (compute_eigs) {
8443: PetscReal lambda_max_s, lambda_min_s;
8444: KSPConvergedReason reason;
8445: KSPGetType(check_ksp, &check_ksp_type);
8446: KSPGetIterationNumber(check_ksp, &its);
8447: KSPGetConvergedReason(check_ksp, &reason);
8448: KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s);
8449: PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s);
8450: for (i = 0; i < neigs; i++) PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]);
8451: }
8452: PetscViewerFlush(dbg_viewer);
8453: PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1));
8454: }
8455: VecDestroy(&check_vec);
8456: VecDestroy(&coarse_vec);
8457: KSPDestroy(&check_ksp);
8458: if (compute_eigs) {
8459: PetscFree(eigs_r);
8460: PetscFree(eigs_c);
8461: }
8462: }
8463: }
8464: MatNullSpaceDestroy(&CoarseNullSpace);
8465: /* print additional info */
8466: if (pcbddc->dbg_flag) {
8467: /* waits until all processes reaches this point */
8468: PetscBarrier((PetscObject)pc);
8469: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level);
8470: PetscViewerFlush(pcbddc->dbg_viewer);
8471: }
8473: /* free memory */
8474: MatDestroy(&coarse_mat);
8475: PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0);
8476: return 0;
8477: }
8479: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
8480: {
8481: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8482: PC_IS *pcis = (PC_IS *)pc->data;
8483: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
8484: IS subset, subset_mult, subset_n;
8485: PetscInt local_size, coarse_size = 0;
8486: PetscInt *local_primal_indices = NULL;
8487: const PetscInt *t_local_primal_indices;
8489: /* Compute global number of coarse dofs */
8491: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n);
8492: ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset);
8493: ISDestroy(&subset_n);
8494: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult);
8495: ISRenumber(subset, subset_mult, &coarse_size, &subset_n);
8496: ISDestroy(&subset);
8497: ISDestroy(&subset_mult);
8498: ISGetLocalSize(subset_n, &local_size);
8500: PetscMalloc1(local_size, &local_primal_indices);
8501: ISGetIndices(subset_n, &t_local_primal_indices);
8502: PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size);
8503: ISRestoreIndices(subset_n, &t_local_primal_indices);
8504: ISDestroy(&subset_n);
8506: /* check numbering */
8507: if (pcbddc->dbg_flag) {
8508: PetscScalar coarsesum, *array, *array2;
8509: PetscInt i;
8510: PetscBool set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE;
8512: PetscViewerFlush(pcbddc->dbg_viewer);
8513: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
8514: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n");
8515: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8516: /* counter */
8517: VecSet(pcis->vec1_global, 0.0);
8518: VecSet(pcis->vec1_N, 1.0);
8519: VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8520: VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8521: VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD);
8522: VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD);
8523: VecSet(pcis->vec1_N, 0.0);
8524: for (i = 0; i < pcbddc->local_primal_size; i++) VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES);
8525: VecAssemblyBegin(pcis->vec1_N);
8526: VecAssemblyEnd(pcis->vec1_N);
8527: VecSet(pcis->vec1_global, 0.0);
8528: VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8529: VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8530: VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD);
8531: VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD);
8532: VecGetArray(pcis->vec1_N, &array);
8533: VecGetArray(pcis->vec2_N, &array2);
8534: for (i = 0; i < pcis->n; i++) {
8535: if (array[i] != 0.0 && array[i] != array2[i]) {
8536: PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi;
8537: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8538: set_error = PETSC_TRUE;
8539: ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi);
8540: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh);
8541: }
8542: }
8543: VecRestoreArray(pcis->vec2_N, &array2);
8544: MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
8545: PetscViewerFlush(pcbddc->dbg_viewer);
8546: for (i = 0; i < pcis->n; i++) {
8547: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]);
8548: }
8549: VecRestoreArray(pcis->vec1_N, &array);
8550: VecSet(pcis->vec1_global, 0.0);
8551: VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8552: VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8553: VecSum(pcis->vec1_global, &coarsesum);
8554: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum));
8555: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8556: PetscInt *gidxs;
8558: PetscMalloc1(pcbddc->local_primal_size, &gidxs);
8559: ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs);
8560: PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n");
8561: PetscViewerFlush(pcbddc->dbg_viewer);
8562: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank);
8563: for (i = 0; i < pcbddc->local_primal_size; i++) {
8564: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i]);
8565: }
8566: PetscViewerFlush(pcbddc->dbg_viewer);
8567: PetscFree(gidxs);
8568: }
8569: PetscViewerFlush(pcbddc->dbg_viewer);
8570: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8572: }
8574: /* get back data */
8575: *coarse_size_n = coarse_size;
8576: *local_primal_indices_n = local_primal_indices;
8577: return 0;
8578: }
8580: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
8581: {
8582: IS localis_t;
8583: PetscInt i, lsize, *idxs, n;
8584: PetscScalar *vals;
8586: /* get indices in local ordering exploiting local to global map */
8587: ISGetLocalSize(globalis, &lsize);
8588: PetscMalloc1(lsize, &vals);
8589: for (i = 0; i < lsize; i++) vals[i] = 1.0;
8590: ISGetIndices(globalis, (const PetscInt **)&idxs);
8591: VecSet(gwork, 0.0);
8592: VecSet(lwork, 0.0);
8593: if (idxs) { /* multilevel guard */
8594: VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE);
8595: VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES);
8596: }
8597: VecAssemblyBegin(gwork);
8598: ISRestoreIndices(globalis, (const PetscInt **)&idxs);
8599: PetscFree(vals);
8600: VecAssemblyEnd(gwork);
8601: /* now compute set in local ordering */
8602: VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD);
8603: VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD);
8604: VecGetArrayRead(lwork, (const PetscScalar **)&vals);
8605: VecGetSize(lwork, &n);
8606: for (i = 0, lsize = 0; i < n; i++) {
8607: if (PetscRealPart(vals[i]) > 0.5) lsize++;
8608: }
8609: PetscMalloc1(lsize, &idxs);
8610: for (i = 0, lsize = 0; i < n; i++) {
8611: if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
8612: }
8613: VecRestoreArrayRead(lwork, (const PetscScalar **)&vals);
8614: ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t);
8615: *localis = localis_t;
8616: return 0;
8617: }
8619: PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
8620: {
8621: PC_IS *pcis = (PC_IS *)pc->data;
8622: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8623: PC_IS *pcisf;
8624: PC_BDDC *pcbddcf;
8625: PC pcf;
8627: PCCreate(PetscObjectComm((PetscObject)pc), &pcf);
8628: PCSetOperators(pcf, pc->mat, pc->pmat);
8629: PCSetType(pcf, PCBDDC);
8631: pcisf = (PC_IS *)pcf->data;
8632: pcbddcf = (PC_BDDC *)pcf->data;
8634: pcisf->is_B_local = pcis->is_B_local;
8635: pcisf->vec1_N = pcis->vec1_N;
8636: pcisf->BtoNmap = pcis->BtoNmap;
8637: pcisf->n = pcis->n;
8638: pcisf->n_B = pcis->n_B;
8640: PetscFree(pcbddcf->mat_graph);
8641: PetscFree(pcbddcf->sub_schurs);
8642: pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph;
8643: pcbddcf->sub_schurs = schurs;
8644: pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE;
8645: pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
8646: pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
8647: pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin;
8648: pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax;
8649: pcbddcf->use_faces = PETSC_TRUE;
8650: pcbddcf->use_change_of_basis = (PetscBool)!constraints;
8651: pcbddcf->use_change_on_faces = (PetscBool)!constraints;
8652: pcbddcf->use_qr_single = (PetscBool)!constraints;
8653: pcbddcf->fake_change = PETSC_TRUE;
8654: pcbddcf->dbg_flag = pcbddc->dbg_flag;
8656: PCBDDCAdaptiveSelection(pcf);
8657: PCBDDCConstraintsSetUp(pcf);
8659: *change = pcbddcf->ConstraintMatrix;
8660: if (change_primal) ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal);
8661: if (change_primal_mult) ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult);
8662: if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
8664: if (schurs) pcbddcf->sub_schurs = NULL;
8665: pcbddcf->ConstraintMatrix = NULL;
8666: pcbddcf->mat_graph = NULL;
8667: pcisf->is_B_local = NULL;
8668: pcisf->vec1_N = NULL;
8669: pcisf->BtoNmap = NULL;
8670: PCDestroy(&pcf);
8671: return 0;
8672: }
8674: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8675: {
8676: PC_IS *pcis = (PC_IS *)pc->data;
8677: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8678: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
8679: Mat S_j;
8680: PetscInt *used_xadj, *used_adjncy;
8681: PetscBool free_used_adj;
8683: PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0);
8684: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8685: free_used_adj = PETSC_FALSE;
8686: if (pcbddc->sub_schurs_layers == -1) {
8687: used_xadj = NULL;
8688: used_adjncy = NULL;
8689: } else {
8690: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8691: used_xadj = pcbddc->mat_graph->xadj;
8692: used_adjncy = pcbddc->mat_graph->adjncy;
8693: } else if (pcbddc->computed_rowadj) {
8694: used_xadj = pcbddc->mat_graph->xadj;
8695: used_adjncy = pcbddc->mat_graph->adjncy;
8696: } else {
8697: PetscBool flg_row = PETSC_FALSE;
8698: const PetscInt *xadj, *adjncy;
8699: PetscInt nvtxs;
8701: MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row);
8702: if (flg_row) {
8703: PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy);
8704: PetscArraycpy(used_xadj, xadj, nvtxs + 1);
8705: PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]);
8706: free_used_adj = PETSC_TRUE;
8707: } else {
8708: pcbddc->sub_schurs_layers = -1;
8709: used_xadj = NULL;
8710: used_adjncy = NULL;
8711: }
8712: MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row);
8713: }
8714: }
8716: /* setup sub_schurs data */
8717: MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j);
8718: if (!sub_schurs->schur_explicit) {
8719: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8720: MatSchurComplementSetKSP(S_j, pcbddc->ksp_D);
8721: PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL);
8722: } else {
8723: Mat change = NULL;
8724: Vec scaling = NULL;
8725: IS change_primal = NULL, iP;
8726: PetscInt benign_n;
8727: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8728: PetscBool need_change = PETSC_FALSE;
8729: PetscBool discrete_harmonic = PETSC_FALSE;
8731: if (!pcbddc->use_vertices && reuse_solvers) {
8732: PetscInt n_vertices;
8734: ISGetLocalSize(sub_schurs->is_vertices, &n_vertices);
8735: reuse_solvers = (PetscBool)!n_vertices;
8736: }
8737: if (!pcbddc->benign_change_explicit) {
8738: benign_n = pcbddc->benign_n;
8739: } else {
8740: benign_n = 0;
8741: }
8742: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8743: We need a global reduction to avoid possible deadlocks.
8744: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8745: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8746: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8747: MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
8748: need_change = (PetscBool)(!need_change);
8749: }
8750: /* If the user defines additional constraints, we import them here */
8751: if (need_change) {
8753: PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr);
8754: }
8755: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
8757: PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP);
8758: if (iP) {
8759: PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
8760: PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL);
8761: PetscOptionsEnd();
8762: }
8763: if (discrete_harmonic) {
8764: Mat A;
8765: MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A);
8766: MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL);
8767: PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP);
8768: PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
8769: pcbddc->benign_zerodiag_subs, change, change_primal));
8770: MatDestroy(&A);
8771: } else {
8772: PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
8773: pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
8774: }
8775: MatDestroy(&change);
8776: ISDestroy(&change_primal);
8777: }
8778: MatDestroy(&S_j);
8780: /* free adjacency */
8781: if (free_used_adj) PetscFree2(used_xadj, used_adjncy);
8782: PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0);
8783: return 0;
8784: }
8786: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8787: {
8788: PC_IS *pcis = (PC_IS *)pc->data;
8789: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8790: PCBDDCGraph graph;
8792: /* attach interface graph for determining subsets */
8793: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8794: IS verticesIS, verticescomm;
8795: PetscInt vsize, *idxs;
8797: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS);
8798: ISGetSize(verticesIS, &vsize);
8799: ISGetIndices(verticesIS, (const PetscInt **)&idxs);
8800: ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm);
8801: ISRestoreIndices(verticesIS, (const PetscInt **)&idxs);
8802: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS);
8803: PCBDDCGraphCreate(&graph);
8804: PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount);
8805: PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm);
8806: ISDestroy(&verticescomm);
8807: PCBDDCGraphComputeConnectedComponents(graph);
8808: } else {
8809: graph = pcbddc->mat_graph;
8810: }
8811: /* print some info */
8812: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8813: IS vertices;
8814: PetscInt nv, nedges, nfaces;
8815: PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer);
8816: PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices);
8817: ISGetSize(vertices, &nv);
8818: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8819: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n");
8820: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices);
8821: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges);
8822: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces);
8823: PetscViewerFlush(pcbddc->dbg_viewer);
8824: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8825: PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices);
8826: }
8828: /* sub_schurs init */
8829: if (!pcbddc->sub_schurs) PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8830: PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE);
8832: /* free graph struct */
8833: if (pcbddc->sub_schurs_rebuild) PCBDDCGraphDestroy(&graph);
8834: return 0;
8835: }
8837: PetscErrorCode PCBDDCCheckOperator(PC pc)
8838: {
8839: PC_IS *pcis = (PC_IS *)pc->data;
8840: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8842: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8843: IS zerodiag = NULL;
8844: Mat S_j, B0_B = NULL;
8845: Vec dummy_vec = NULL, vec_check_B, vec_scale_P;
8846: PetscScalar *p0_check, *array, *array2;
8847: PetscReal norm;
8848: PetscInt i;
8850: /* B0 and B0_B */
8851: if (zerodiag) {
8852: IS dummy;
8854: ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &dummy);
8855: MatCreateSubMatrix(pcbddc->benign_B0, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B);
8856: MatCreateVecs(B0_B, NULL, &dummy_vec);
8857: ISDestroy(&dummy);
8858: }
8859: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8860: VecDuplicate(pcbddc->vec1_P, &vec_scale_P);
8861: VecSet(pcbddc->vec1_P, 1.0);
8862: VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD);
8863: VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD);
8864: VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE);
8865: VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE);
8866: VecReciprocal(vec_scale_P);
8867: /* S_j */
8868: MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j);
8869: MatSchurComplementSetKSP(S_j, pcbddc->ksp_D);
8871: /* mimic vector in \widetilde{W}_\Gamma */
8872: VecSetRandom(pcis->vec1_N, NULL);
8873: /* continuous in primal space */
8874: VecSetRandom(pcbddc->coarse_vec, NULL);
8875: VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE);
8876: VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE);
8877: VecGetArray(pcbddc->vec1_P, &array);
8878: PetscCalloc1(pcbddc->benign_n, &p0_check);
8879: for (i = 0; i < pcbddc->benign_n; i++) p0_check[i] = array[pcbddc->local_primal_size - pcbddc->benign_n + i];
8880: VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES);
8881: VecRestoreArray(pcbddc->vec1_P, &array);
8882: VecAssemblyBegin(pcis->vec1_N);
8883: VecAssemblyEnd(pcis->vec1_N);
8884: VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD);
8885: VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD);
8886: VecDuplicate(pcis->vec2_B, &vec_check_B);
8887: VecCopy(pcis->vec2_B, vec_check_B);
8889: /* assemble rhs for coarse problem */
8890: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8891: /* local with Schur */
8892: MatMult(S_j, pcis->vec2_B, pcis->vec1_B);
8893: if (zerodiag) {
8894: VecGetArray(dummy_vec, &array);
8895: for (i = 0; i < pcbddc->benign_n; i++) array[i] = p0_check[i];
8896: VecRestoreArray(dummy_vec, &array);
8897: MatMultTransposeAdd(B0_B, dummy_vec, pcis->vec1_B, pcis->vec1_B);
8898: }
8899: /* sum on primal nodes the local contributions */
8900: VecScatterBegin(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE);
8901: VecScatterEnd(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE);
8902: VecGetArray(pcis->vec1_N, &array);
8903: VecGetArray(pcbddc->vec1_P, &array2);
8904: for (i = 0; i < pcbddc->local_primal_size; i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8905: VecRestoreArray(pcbddc->vec1_P, &array2);
8906: VecRestoreArray(pcis->vec1_N, &array);
8907: VecSet(pcbddc->coarse_vec, 0.);
8908: VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD);
8909: VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD);
8910: VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE);
8911: VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE);
8912: VecGetArray(pcbddc->vec1_P, &array);
8913: /* scale primal nodes (BDDC sums contibutions) */
8914: VecPointwiseMult(pcbddc->vec1_P, vec_scale_P, pcbddc->vec1_P);
8915: VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES);
8916: VecRestoreArray(pcbddc->vec1_P, &array);
8917: VecAssemblyBegin(pcis->vec1_N);
8918: VecAssemblyEnd(pcis->vec1_N);
8919: VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
8920: VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
8921: /* global: \widetilde{B0}_B w_\Gamma */
8922: if (zerodiag) {
8923: MatMult(B0_B, pcis->vec2_B, dummy_vec);
8924: VecGetArray(dummy_vec, &array);
8925: for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = array[i];
8926: VecRestoreArray(dummy_vec, &array);
8927: }
8928: /* BDDC */
8929: VecSet(pcis->vec1_D, 0.);
8930: PCBDDCApplyInterfacePreconditioner(pc, PETSC_FALSE);
8932: VecCopy(pcis->vec1_B, pcis->vec2_B);
8933: VecAXPY(pcis->vec1_B, -1.0, vec_check_B);
8934: VecNorm(pcis->vec1_B, NORM_INFINITY, &norm);
8935: PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC local error is %1.4e\n", PetscGlobalRank, (double)norm);
8936: for (i = 0; i < pcbddc->benign_n; i++) PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n", PetscGlobalRank, i, (double)PetscAbsScalar(pcbddc->benign_p0[i] - p0_check[i]));
8937: PetscFree(p0_check);
8938: VecDestroy(&vec_scale_P);
8939: VecDestroy(&vec_check_B);
8940: VecDestroy(&dummy_vec);
8941: MatDestroy(&S_j);
8942: MatDestroy(&B0_B);
8943: }
8944: return 0;
8945: }
8947: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8948: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8949: {
8950: Mat At;
8951: IS rows;
8952: PetscInt rst, ren;
8953: PetscLayout rmap;
8955: rst = ren = 0;
8956: if (ccomm != MPI_COMM_NULL) {
8957: PetscLayoutCreate(ccomm, &rmap);
8958: PetscLayoutSetSize(rmap, A->rmap->N);
8959: PetscLayoutSetBlockSize(rmap, 1);
8960: PetscLayoutSetUp(rmap);
8961: PetscLayoutGetRange(rmap, &rst, &ren);
8962: }
8963: ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows);
8964: MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At);
8965: ISDestroy(&rows);
8967: if (ccomm != MPI_COMM_NULL) {
8968: Mat_MPIAIJ *a, *b;
8969: IS from, to;
8970: Vec gvec;
8971: PetscInt lsize;
8973: MatCreate(ccomm, B);
8974: MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N);
8975: MatSetType(*B, MATAIJ);
8976: PetscLayoutDestroy(&((*B)->rmap));
8977: PetscLayoutSetUp((*B)->cmap);
8978: a = (Mat_MPIAIJ *)At->data;
8979: b = (Mat_MPIAIJ *)(*B)->data;
8980: MPI_Comm_size(ccomm, &b->size);
8981: MPI_Comm_rank(ccomm, &b->rank);
8982: PetscObjectReference((PetscObject)a->A);
8983: PetscObjectReference((PetscObject)a->B);
8984: b->A = a->A;
8985: b->B = a->B;
8987: b->donotstash = a->donotstash;
8988: b->roworiented = a->roworiented;
8989: b->rowindices = NULL;
8990: b->rowvalues = NULL;
8991: b->getrowactive = PETSC_FALSE;
8993: (*B)->rmap = rmap;
8994: (*B)->factortype = A->factortype;
8995: (*B)->assembled = PETSC_TRUE;
8996: (*B)->insertmode = NOT_SET_VALUES;
8997: (*B)->preallocated = PETSC_TRUE;
8999: if (a->colmap) {
9000: #if defined(PETSC_USE_CTABLE)
9001: PetscTableCreateCopy(a->colmap, &b->colmap);
9002: #else
9003: PetscMalloc1(At->cmap->N, &b->colmap);
9004: PetscArraycpy(b->colmap, a->colmap, At->cmap->N);
9005: #endif
9006: } else b->colmap = NULL;
9007: if (a->garray) {
9008: PetscInt len;
9009: len = a->B->cmap->n;
9010: PetscMalloc1(len + 1, &b->garray);
9011: if (len) PetscArraycpy(b->garray, a->garray, len);
9012: } else b->garray = NULL;
9014: PetscObjectReference((PetscObject)a->lvec);
9015: b->lvec = a->lvec;
9017: /* cannot use VecScatterCopy */
9018: VecGetLocalSize(b->lvec, &lsize);
9019: ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from);
9020: ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to);
9021: MatCreateVecs(*B, &gvec, NULL);
9022: VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx);
9023: ISDestroy(&from);
9024: ISDestroy(&to);
9025: VecDestroy(&gvec);
9026: }
9027: MatDestroy(&At);
9028: return 0;
9029: }