Actual source code: bddcprivate.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <petsc/private/pcbddcimpl.h>
  3: #include <petsc/private/pcbddcprivateimpl.h>
  4: #include <../src/mat/impls/dense/seq/dense.h>
  5: #include <petscdmplex.h>
  6: #include <petscblaslapack.h>
  7: #include <petsc/private/sfimpl.h>
  8: #include <petsc/private/dmpleximpl.h>
  9: #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17:   PetscScalar *uwork, *data, *U, ds = 0.;
 18:   PetscReal   *sing;
 19:   PetscBLASInt bM, bN, lwork, lierr, di = 1;
 20:   PetscInt     ulw, i, nr, nc, n;
 21: #if defined(PETSC_USE_COMPLEX)
 22:   PetscReal *rwork2;
 23: #endif

 25:   MatGetSize(A, &nr, &nc);
 26:   if (!nr || !nc) return 0;

 28:   /* workspace */
 29:   if (!work) {
 30:     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
 31:     PetscMalloc1(ulw, &uwork);
 32:   } else {
 33:     ulw   = lw;
 34:     uwork = work;
 35:   }
 36:   n = PetscMin(nr, nc);
 37:   if (!rwork) {
 38:     PetscMalloc1(n, &sing);
 39:   } else {
 40:     sing = rwork;
 41:   }

 43:   /* SVD */
 44:   PetscMalloc1(nr * nr, &U);
 45:   PetscBLASIntCast(nr, &bM);
 46:   PetscBLASIntCast(nc, &bN);
 47:   PetscBLASIntCast(ulw, &lwork);
 48:   MatDenseGetArray(A, &data);
 49:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 50: #if !defined(PETSC_USE_COMPLEX)
 51:   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
 52: #else
 53:   PetscMalloc1(5 * n, &rwork2);
 54:   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
 55:   PetscFree(rwork2);
 56: #endif
 57:   PetscFPTrapPop();
 59:   MatDenseRestoreArray(A, &data);
 60:   for (i = 0; i < n; i++)
 61:     if (sing[i] < PETSC_SMALL) break;
 62:   if (!rwork) PetscFree(sing);
 63:   if (!work) PetscFree(uwork);
 64:   /* create B */
 65:   if (!range) {
 66:     MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B);
 67:     MatDenseGetArray(*B, &data);
 68:     PetscArraycpy(data, U + nr * i, (nr - i) * nr);
 69:   } else {
 70:     MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B);
 71:     MatDenseGetArray(*B, &data);
 72:     PetscArraycpy(data, U, i * nr);
 73:   }
 74:   MatDenseRestoreArray(*B, &data);
 75:   PetscFree(U);
 76:   return 0;
 77: }

 79: /* TODO REMOVE */
 80: #if defined(PRINT_GDET)
 81: static int inc = 0;
 82: static int lev = 0;
 83: #endif

 85: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 86: {
 87:   Mat          GE, GEd;
 88:   PetscInt     rsize, csize, esize;
 89:   PetscScalar *ptr;

 91:   ISGetSize(edge, &esize);
 92:   if (!esize) return 0;
 93:   ISGetSize(extrow, &rsize);
 94:   ISGetSize(extcol, &csize);

 96:   /* gradients */
 97:   ptr = work + 5 * esize;
 98:   MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE);
 99:   MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins);
100:   MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins);
101:   MatDestroy(&GE);

103:   /* constants */
104:   ptr += rsize * csize;
105:   MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd);
106:   MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE);
107:   MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd);
108:   MatDestroy(&GE);
109:   MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins);
110:   MatDestroy(&GEd);

112:   if (corners) {
113:     Mat                GEc;
114:     const PetscScalar *vals;
115:     PetscScalar        v;

117:     MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc);
118:     MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd);
119:     MatDenseGetArrayRead(GEd, &vals);
120:     /* v    = PetscAbsScalar(vals[0]) */;
121:     v        = 1.;
122:     cvals[0] = vals[0] / v;
123:     cvals[1] = vals[1] / v;
124:     MatDenseRestoreArrayRead(GEd, &vals);
125:     MatScale(*GKins, 1. / v);
126: #if defined(PRINT_GDET)
127:     {
128:       PetscViewer viewer;
129:       char        filename[256];
130:       sprintf(filename, "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++);
131:       PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer);
132:       PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB);
133:       PetscObjectSetName((PetscObject)GEc, "GEc");
134:       MatView(GEc, viewer);
135:       PetscObjectSetName((PetscObject)(*GKins), "GK");
136:       MatView(*GKins, viewer);
137:       PetscObjectSetName((PetscObject)GEd, "Gproj");
138:       MatView(GEd, viewer);
139:       PetscViewerDestroy(&viewer);
140:     }
141: #endif
142:     MatDestroy(&GEd);
143:     MatDestroy(&GEc);
144:   }

146:   return 0;
147: }

149: PetscErrorCode PCBDDCNedelecSupport(PC pc)
150: {
151:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
152:   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
153:   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
154:   Vec                    tvec;
155:   PetscSF                sfv;
156:   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
157:   MPI_Comm               comm;
158:   IS                     lned, primals, allprimals, nedfieldlocal;
159:   IS                    *eedges, *extrows, *extcols, *alleedges;
160:   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
161:   PetscScalar           *vals, *work;
162:   PetscReal             *rwork;
163:   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
164:   PetscInt               ne, nv, Lv, order, n, field;
165:   PetscInt               n_neigh, *neigh, *n_shared, **shared;
166:   PetscInt               i, j, extmem, cum, maxsize, nee;
167:   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
168:   PetscInt              *sfvleaves, *sfvroots;
169:   PetscInt              *corners, *cedges;
170:   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
171:   PetscInt              *emarks;
172:   PetscBool              print, eerr, done, lrc[2], conforming, global, singular, setprimal;

174:   /* If the discrete gradient is defined for a subset of dofs and global is true,
175:      it assumes G is given in global ordering for all the dofs.
176:      Otherwise, the ordering is global for the Nedelec field */
177:   order      = pcbddc->nedorder;
178:   conforming = pcbddc->conforming;
179:   field      = pcbddc->nedfield;
180:   global     = pcbddc->nedglobal;
181:   setprimal  = PETSC_FALSE;
182:   print      = PETSC_FALSE;
183:   singular   = PETSC_FALSE;

185:   /* Command line customization */
186:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
187:   PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL);
188:   PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL);
189:   PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL);
190:   /* print debug info TODO: to be removed */
191:   PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL);
192:   PetscOptionsEnd();

194:   /* Return if there are no edges in the decomposition and the problem is not singular */
195:   MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL);
196:   ISLocalToGlobalMappingGetSize(al2g, &n);
197:   PetscObjectGetComm((PetscObject)pc, &comm);
198:   if (!singular) {
199:     VecGetArrayRead(matis->counter, (const PetscScalar **)&vals);
200:     lrc[0] = PETSC_FALSE;
201:     for (i = 0; i < n; i++) {
202:       if (PetscRealPart(vals[i]) > 2.) {
203:         lrc[0] = PETSC_TRUE;
204:         break;
205:       }
206:     }
207:     VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals);
208:     MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm);
209:     if (!lrc[1]) return 0;
210:   }

212:   /* Get Nedelec field */
214:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
215:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
216:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
217:     ISGetLocalSize(nedfieldlocal, &ne);
218:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
219:     ne            = n;
220:     nedfieldlocal = NULL;
221:     global        = PETSC_TRUE;
222:   } else if (field == PETSC_DECIDE) {
223:     PetscInt rst, ren, *idx;

225:     PetscArrayzero(matis->sf_leafdata, n);
226:     PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n);
227:     MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren);
228:     for (i = rst; i < ren; i++) {
229:       PetscInt nc;

231:       MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL);
232:       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
233:       MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL);
234:     }
235:     PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
236:     PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
237:     PetscMalloc1(n, &idx);
238:     for (i = 0, ne = 0; i < n; i++)
239:       if (matis->sf_leafdata[i]) idx[ne++] = i;
240:     ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal);
241:   } else {
242:     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
243:   }

245:   /* Sanity checks */

250:   /* Just set primal dofs and return */
251:   if (setprimal) {
252:     IS        enedfieldlocal;
253:     PetscInt *eidxs;

255:     PetscMalloc1(ne, &eidxs);
256:     VecGetArrayRead(matis->counter, (const PetscScalar **)&vals);
257:     if (nedfieldlocal) {
258:       ISGetIndices(nedfieldlocal, &idxs);
259:       for (i = 0, cum = 0; i < ne; i++) {
260:         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
261:       }
262:       ISRestoreIndices(nedfieldlocal, &idxs);
263:     } else {
264:       for (i = 0, cum = 0; i < ne; i++) {
265:         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
266:       }
267:     }
268:     VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals);
269:     ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal);
270:     PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal);
271:     PetscFree(eidxs);
272:     ISDestroy(&nedfieldlocal);
273:     ISDestroy(&enedfieldlocal);
274:     return 0;
275:   }

277:   /* Compute some l2g maps */
278:   if (nedfieldlocal) {
279:     IS is;

281:     /* need to map from the local Nedelec field to local numbering */
282:     ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g);
283:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
284:     ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is);
285:     ISLocalToGlobalMappingCreateIS(is, &al2g);
286:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
287:     if (global) {
288:       PetscObjectReference((PetscObject)al2g);
289:       el2g = al2g;
290:     } else {
291:       IS gis;

293:       ISRenumber(is, NULL, NULL, &gis);
294:       ISLocalToGlobalMappingCreateIS(gis, &el2g);
295:       ISDestroy(&gis);
296:     }
297:     ISDestroy(&is);
298:   } else {
299:     /* restore default */
300:     pcbddc->nedfield = -1;
301:     /* one ref for the destruction of al2g, one for el2g */
302:     PetscObjectReference((PetscObject)al2g);
303:     PetscObjectReference((PetscObject)al2g);
304:     el2g = al2g;
305:     fl2g = NULL;
306:   }

308:   /* Start communication to drop connections for interior edges (for cc analysis only) */
309:   PetscArrayzero(matis->sf_leafdata, n);
310:   PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n);
311:   if (nedfieldlocal) {
312:     ISGetIndices(nedfieldlocal, &idxs);
313:     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
314:     ISRestoreIndices(nedfieldlocal, &idxs);
315:   } else {
316:     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
317:   }
318:   PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM);
319:   PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM);

321:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
322:     MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G);
323:     MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE);
324:     if (global) {
325:       PetscInt rst;

327:       MatGetOwnershipRange(G, &rst, NULL);
328:       for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
329:         if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
330:       }
331:       MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE);
332:       MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL);
333:     } else {
334:       PetscInt *tbz;

336:       PetscMalloc1(ne, &tbz);
337:       PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
338:       PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
339:       ISGetIndices(nedfieldlocal, &idxs);
340:       for (i = 0, cum = 0; i < ne; i++)
341:         if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
342:       ISRestoreIndices(nedfieldlocal, &idxs);
343:       ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz);
344:       MatZeroRows(G, cum, tbz, 0., NULL, NULL);
345:       PetscFree(tbz);
346:     }
347:   } else { /* we need the entire G to infer the nullspace */
348:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
349:     G = pcbddc->discretegradient;
350:   }

352:   /* Extract subdomain relevant rows of G */
353:   ISLocalToGlobalMappingGetIndices(el2g, &idxs);
354:   ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned);
355:   MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall);
356:   ISLocalToGlobalMappingRestoreIndices(el2g, &idxs);
357:   ISDestroy(&lned);
358:   MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis);
359:   MatDestroy(&lGall);
360:   MatISGetLocalMat(lGis, &lG);

362:   /* SF for nodal dofs communications */
363:   MatGetLocalSize(G, NULL, &Lv);
364:   MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g);
365:   PetscObjectReference((PetscObject)vl2g);
366:   ISLocalToGlobalMappingGetSize(vl2g, &nv);
367:   PetscSFCreate(comm, &sfv);
368:   ISLocalToGlobalMappingGetIndices(vl2g, &idxs);
369:   PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs);
370:   ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs);
371:   i = singular ? 2 : 1;
372:   PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots);

374:   /* Destroy temporary G created in MATIS format and modified G */
375:   PetscObjectReference((PetscObject)lG);
376:   MatDestroy(&lGis);
377:   MatDestroy(&G);

379:   if (print) {
380:     PetscObjectSetName((PetscObject)lG, "initial_lG");
381:     MatView(lG, NULL);
382:   }

384:   /* Save lG for values insertion in change of basis */
385:   MatDuplicate(lG, MAT_COPY_VALUES, &lGinit);

387:   /* Analyze the edge-nodes connections (duplicate lG) */
388:   MatDuplicate(lG, MAT_COPY_VALUES, &lGe);
389:   MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE);
390:   PetscBTCreate(nv, &btv);
391:   PetscBTCreate(ne, &bte);
392:   PetscBTCreate(ne, &btb);
393:   PetscBTCreate(ne, &btbd);
394:   PetscBTCreate(nv, &btvcand);
395:   /* need to import the boundary specification to ensure the
396:      proper detection of coarse edges' endpoints */
397:   if (pcbddc->DirichletBoundariesLocal) {
398:     IS is;

400:     if (fl2g) {
401:       ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is);
402:     } else {
403:       is = pcbddc->DirichletBoundariesLocal;
404:     }
405:     ISGetLocalSize(is, &cum);
406:     ISGetIndices(is, &idxs);
407:     for (i = 0; i < cum; i++) {
408:       if (idxs[i] >= 0) {
409:         PetscBTSet(btb, idxs[i]);
410:         PetscBTSet(btbd, idxs[i]);
411:       }
412:     }
413:     ISRestoreIndices(is, &idxs);
414:     if (fl2g) ISDestroy(&is);
415:   }
416:   if (pcbddc->NeumannBoundariesLocal) {
417:     IS is;

419:     if (fl2g) {
420:       ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is);
421:     } else {
422:       is = pcbddc->NeumannBoundariesLocal;
423:     }
424:     ISGetLocalSize(is, &cum);
425:     ISGetIndices(is, &idxs);
426:     for (i = 0; i < cum; i++) {
427:       if (idxs[i] >= 0) PetscBTSet(btb, idxs[i]);
428:     }
429:     ISRestoreIndices(is, &idxs);
430:     if (fl2g) ISDestroy(&is);
431:   }

433:   /* Count neighs per dof */
434:   ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs);
435:   ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs);

437:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
438:      for proper detection of coarse edges' endpoints */
439:   PetscBTCreate(ne, &btee);
440:   for (i = 0; i < ne; i++) {
441:     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscBTSet(btee, i);
442:   }
443:   PetscMalloc1(ne, &marks);
444:   if (!conforming) {
445:     MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt);
446:     MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
447:   }
448:   MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
449:   MatSeqAIJGetArray(lGe, &vals);
450:   cum = 0;
451:   for (i = 0; i < ne; i++) {
452:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
453:     if (!PetscBTLookup(btee, i)) {
454:       marks[cum++] = i;
455:       continue;
456:     }
457:     /* set badly connected edge dofs as primal */
458:     if (!conforming) {
459:       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
460:         marks[cum++] = i;
461:         PetscBTSet(bte, i);
462:         for (j = ii[i]; j < ii[i + 1]; j++) PetscBTSet(btv, jj[j]);
463:       } else {
464:         /* every edge dofs should be connected trough a certain number of nodal dofs
465:            to other edge dofs belonging to coarse edges
466:            - at most 2 endpoints
467:            - order-1 interior nodal dofs
468:            - no undefined nodal dofs (nconn < order)
469:         */
470:         PetscInt ends = 0, ints = 0, undef = 0;
471:         for (j = ii[i]; j < ii[i + 1]; j++) {
472:           PetscInt v     = jj[j], k;
473:           PetscInt nconn = iit[v + 1] - iit[v];
474:           for (k = iit[v]; k < iit[v + 1]; k++)
475:             if (!PetscBTLookup(btee, jjt[k])) nconn--;
476:           if (nconn > order) ends++;
477:           else if (nconn == order) ints++;
478:           else undef++;
479:         }
480:         if (undef || ends > 2 || ints != order - 1) {
481:           marks[cum++] = i;
482:           PetscBTSet(bte, i);
483:           for (j = ii[i]; j < ii[i + 1]; j++) PetscBTSet(btv, jj[j]);
484:         }
485:       }
486:     }
487:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
488:     if (!order && ii[i + 1] != ii[i]) {
489:       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
490:       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
491:     }
492:   }
493:   PetscBTDestroy(&btee);
494:   MatSeqAIJRestoreArray(lGe, &vals);
495:   MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
496:   if (!conforming) {
497:     MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
498:     MatDestroy(&lGt);
499:   }
500:   MatZeroRows(lGe, cum, marks, 0., NULL, NULL);

502:   /* identify splitpoints and corner candidates */
503:   MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt);
504:   if (print) {
505:     PetscObjectSetName((PetscObject)lGe, "edgerestr_lG");
506:     MatView(lGe, NULL);
507:     PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt");
508:     MatView(lGt, NULL);
509:   }
510:   MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
511:   MatSeqAIJGetArray(lGt, &vals);
512:   for (i = 0; i < nv; i++) {
513:     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
514:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
515:     if (!order) { /* variable order */
516:       PetscReal vorder = 0.;

518:       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
519:       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
521:       ord = 1;
522:     }
523:     PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord);
524:     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
525:       if (PetscBTLookup(btbd, jj[j])) {
526:         bdir = PETSC_TRUE;
527:         break;
528:       }
529:       if (vc != ecount[jj[j]]) {
530:         sneighs = PETSC_FALSE;
531:       } else {
532:         PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]];
533:         for (k = 0; k < vc; k++) {
534:           if (vn[k] != en[k]) {
535:             sneighs = PETSC_FALSE;
536:             break;
537:           }
538:         }
539:       }
540:     }
541:     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
542:       if (print) PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]);
543:       PetscBTSet(btv, i);
544:     } else if (test == ord) {
545:       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
546:         if (print) PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i);
547:         PetscBTSet(btv, i);
548:       } else {
549:         if (print) PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i);
550:         PetscBTSet(btvcand, i);
551:       }
552:     }
553:   }
554:   ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs);
555:   ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs);
556:   PetscBTDestroy(&btbd);

558:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
559:   if (order != 1) {
560:     if (print) PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n");
561:     MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
562:     for (i = 0; i < nv; i++) {
563:       if (PetscBTLookup(btvcand, i)) {
564:         PetscBool found = PETSC_FALSE;
565:         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
566:           PetscInt k, e = jj[j];
567:           if (PetscBTLookup(bte, e)) continue;
568:           for (k = iit[e]; k < iit[e + 1]; k++) {
569:             PetscInt v = jjt[k];
570:             if (v != i && PetscBTLookup(btvcand, v)) {
571:               found = PETSC_TRUE;
572:               break;
573:             }
574:           }
575:         }
576:         if (!found) {
577:           if (print) PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i);
578:           PetscBTClear(btvcand, i);
579:         } else {
580:           if (print) PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i);
581:         }
582:       }
583:     }
584:     MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
585:   }
586:   MatSeqAIJRestoreArray(lGt, &vals);
587:   MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
588:   MatDestroy(&lGe);

590:   /* Get the local G^T explicitly */
591:   MatDestroy(&lGt);
592:   MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt);
593:   MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE);

595:   /* Mark interior nodal dofs */
596:   ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared);
597:   PetscBTCreate(nv, &btvi);
598:   for (i = 1; i < n_neigh; i++) {
599:     for (j = 0; j < n_shared[i]; j++) PetscBTSet(btvi, shared[i][j]);
600:   }
601:   ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared);

603:   /* communicate corners and splitpoints */
604:   PetscMalloc1(nv, &vmarks);
605:   PetscArrayzero(sfvleaves, nv);
606:   PetscArrayzero(sfvroots, Lv);
607:   for (i = 0; i < nv; i++)
608:     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;

610:   if (print) {
611:     IS tbz;

613:     cum = 0;
614:     for (i = 0; i < nv; i++)
615:       if (sfvleaves[i]) vmarks[cum++] = i;

617:     ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz);
618:     PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local");
619:     ISView(tbz, NULL);
620:     ISDestroy(&tbz);
621:   }

623:   PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM);
624:   PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM);
625:   PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE);
626:   PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE);

628:   /* Zero rows of lGt corresponding to identified corners
629:      and interior nodal dofs */
630:   cum = 0;
631:   for (i = 0; i < nv; i++) {
632:     if (sfvleaves[i]) {
633:       vmarks[cum++] = i;
634:       PetscBTSet(btv, i);
635:     }
636:     if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
637:   }
638:   PetscBTDestroy(&btvi);
639:   if (print) {
640:     IS tbz;

642:     ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz);
643:     PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior");
644:     ISView(tbz, NULL);
645:     ISDestroy(&tbz);
646:   }
647:   MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL);
648:   PetscFree(vmarks);
649:   PetscSFDestroy(&sfv);
650:   PetscFree2(sfvleaves, sfvroots);

652:   /* Recompute G */
653:   MatDestroy(&lG);
654:   MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG);
655:   if (print) {
656:     PetscObjectSetName((PetscObject)lG, "used_lG");
657:     MatView(lG, NULL);
658:     PetscObjectSetName((PetscObject)lGt, "used_lGt");
659:     MatView(lGt, NULL);
660:   }

662:   /* Get primal dofs (if any) */
663:   cum = 0;
664:   for (i = 0; i < ne; i++) {
665:     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
666:   }
667:   if (fl2g) ISLocalToGlobalMappingApply(fl2g, cum, marks, marks);
668:   ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals);
669:   if (print) {
670:     PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs");
671:     ISView(primals, NULL);
672:   }
673:   PetscBTDestroy(&bte);
674:   /* TODO: what if the user passed in some of them ?  */
675:   PCBDDCSetPrimalVerticesLocalIS(pc, primals);
676:   ISDestroy(&primals);

678:   /* Compute edge connectivity */
679:   PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_");

681:   /* Symbolic conn = lG*lGt */
682:   MatProductCreate(lG, lGt, NULL, &conn);
683:   MatProductSetType(conn, MATPRODUCT_AB);
684:   MatProductSetAlgorithm(conn, "default");
685:   MatProductSetFill(conn, PETSC_DEFAULT);
686:   PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_");
687:   MatProductSetFromOptions(conn);
688:   MatProductSymbolic(conn);

690:   MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
691:   if (fl2g) {
692:     PetscBT   btf;
693:     PetscInt *iia, *jja, *iiu, *jju;
694:     PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;

696:     /* create CSR for all local dofs */
697:     PetscMalloc1(n + 1, &iia);
698:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
700:       iiu = pcbddc->mat_graph->xadj;
701:       jju = pcbddc->mat_graph->adjncy;
702:     } else if (pcbddc->use_local_adj) {
703:       rest = PETSC_TRUE;
704:       MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done);
705:     } else {
706:       free = PETSC_TRUE;
707:       PetscMalloc2(n + 1, &iiu, n, &jju);
708:       iiu[0] = 0;
709:       for (i = 0; i < n; i++) {
710:         iiu[i + 1] = i + 1;
711:         jju[i]     = -1;
712:       }
713:     }

715:     /* import sizes of CSR */
716:     iia[0] = 0;
717:     for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];

719:     /* overwrite entries corresponding to the Nedelec field */
720:     PetscBTCreate(n, &btf);
721:     ISGetIndices(nedfieldlocal, &idxs);
722:     for (i = 0; i < ne; i++) {
723:       PetscBTSet(btf, idxs[i]);
724:       iia[idxs[i] + 1] = ii[i + 1] - ii[i];
725:     }

727:     /* iia in CSR */
728:     for (i = 0; i < n; i++) iia[i + 1] += iia[i];

730:     /* jja in CSR */
731:     PetscMalloc1(iia[n], &jja);
732:     for (i = 0; i < n; i++)
733:       if (!PetscBTLookup(btf, i))
734:         for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];

736:     /* map edge dofs connectivity */
737:     if (jj) {
738:       ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj);
739:       for (i = 0; i < ne; i++) {
740:         PetscInt e = idxs[i];
741:         for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
742:       }
743:     }
744:     ISRestoreIndices(nedfieldlocal, &idxs);
745:     PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER);
746:     if (rest) MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done);
747:     if (free) PetscFree2(iiu, jju);
748:     PetscBTDestroy(&btf);
749:   } else {
750:     PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER);
751:   }

753:   /* Analyze interface for edge dofs */
754:   PCBDDCAnalyzeInterface(pc);
755:   pcbddc->mat_graph->twodim = PETSC_FALSE;

757:   /* Get coarse edges in the edge space */
758:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals);
759:   MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);

761:   if (fl2g) {
762:     ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals);
763:     PetscMalloc1(nee, &eedges);
764:     for (i = 0; i < nee; i++) ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]);
765:   } else {
766:     eedges  = alleedges;
767:     primals = allprimals;
768:   }

770:   /* Mark fine edge dofs with their coarse edge id */
771:   PetscArrayzero(marks, ne);
772:   ISGetLocalSize(primals, &cum);
773:   ISGetIndices(primals, &idxs);
774:   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
775:   ISRestoreIndices(primals, &idxs);
776:   if (print) {
777:     PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs");
778:     ISView(primals, NULL);
779:   }

781:   maxsize = 0;
782:   for (i = 0; i < nee; i++) {
783:     PetscInt size, mark = i + 1;

785:     ISGetLocalSize(eedges[i], &size);
786:     ISGetIndices(eedges[i], &idxs);
787:     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
788:     ISRestoreIndices(eedges[i], &idxs);
789:     maxsize = PetscMax(maxsize, size);
790:   }

792:   /* Find coarse edge endpoints */
793:   MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
794:   MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
795:   for (i = 0; i < nee; i++) {
796:     PetscInt mark = i + 1, size;

798:     ISGetLocalSize(eedges[i], &size);
799:     if (!size && nedfieldlocal) continue;
801:     ISGetIndices(eedges[i], &idxs);
802:     if (print) {
803:       PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i);
804:       ISView(eedges[i], NULL);
805:     }
806:     for (j = 0; j < size; j++) {
807:       PetscInt k, ee = idxs[j];
808:       if (print) PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee);
809:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
810:         if (print) PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]);
811:         if (PetscBTLookup(btv, jj[k])) {
812:           if (print) PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]);
813:         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
814:           PetscInt  k2;
815:           PetscBool corner = PETSC_FALSE;
816:           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
817:             if (print) PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]));
818:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
819:                if the edge dof lie on the natural part of the boundary */
820:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
821:               corner = PETSC_TRUE;
822:               break;
823:             }
824:           }
825:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
826:             if (print) PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]);
827:             PetscBTSet(btv, jj[k]);
828:           } else {
829:             if (print) PetscPrintf(PETSC_COMM_SELF, "        no corners found\n");
830:           }
831:         }
832:       }
833:     }
834:     ISRestoreIndices(eedges[i], &idxs);
835:   }
836:   MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
837:   MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
838:   PetscBTDestroy(&btb);

840:   /* Reset marked primal dofs */
841:   ISGetLocalSize(primals, &cum);
842:   ISGetIndices(primals, &idxs);
843:   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
844:   ISRestoreIndices(primals, &idxs);

846:   /* Now use the initial lG */
847:   MatDestroy(&lG);
848:   MatDestroy(&lGt);
849:   lG = lGinit;
850:   MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt);

852:   /* Compute extended cols indices */
853:   PetscBTCreate(nv, &btvc);
854:   PetscBTCreate(nee, &bter);
855:   MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
856:   MatSeqAIJGetMaxRowNonzeros(lG, &i);
857:   i *= maxsize;
858:   PetscCalloc1(nee, &extcols);
859:   PetscMalloc2(i, &extrow, i, &gidxs);
860:   eerr = PETSC_FALSE;
861:   for (i = 0; i < nee; i++) {
862:     PetscInt size, found = 0;

864:     cum = 0;
865:     ISGetLocalSize(eedges[i], &size);
866:     if (!size && nedfieldlocal) continue;
868:     ISGetIndices(eedges[i], &idxs);
869:     PetscBTMemzero(nv, btvc);
870:     for (j = 0; j < size; j++) {
871:       PetscInt k, ee = idxs[j];
872:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
873:         PetscInt vv = jj[k];
874:         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
875:         else if (!PetscBTLookupSet(btvc, vv)) found++;
876:       }
877:     }
878:     ISRestoreIndices(eedges[i], &idxs);
879:     PetscSortRemoveDupsInt(&cum, extrow);
880:     ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs);
881:     PetscSortIntWithArray(cum, gidxs, extrow);
882:     ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]);
883:     /* it may happen that endpoints are not defined at this point
884:        if it is the case, mark this edge for a second pass */
885:     if (cum != size - 1 || found != 2) {
886:       PetscBTSet(bter, i);
887:       if (print) {
888:         PetscObjectSetName((PetscObject)eedges[i], "error_edge");
889:         ISView(eedges[i], NULL);
890:         PetscObjectSetName((PetscObject)extcols[i], "error_extcol");
891:         ISView(extcols[i], NULL);
892:       }
893:       eerr = PETSC_TRUE;
894:     }
895:   }
897:   MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm);
898:   if (done) {
899:     PetscInt *newprimals;

901:     PetscMalloc1(ne, &newprimals);
902:     ISGetLocalSize(primals, &cum);
903:     ISGetIndices(primals, &idxs);
904:     PetscArraycpy(newprimals, idxs, cum);
905:     ISRestoreIndices(primals, &idxs);
906:     MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
907:     if (print) PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]);
908:     for (i = 0; i < nee; i++) {
909:       PetscBool has_candidates = PETSC_FALSE;
910:       if (PetscBTLookup(bter, i)) {
911:         PetscInt size, mark = i + 1;

913:         ISGetLocalSize(eedges[i], &size);
914:         ISGetIndices(eedges[i], &idxs);
915:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
916:         for (j = 0; j < size; j++) {
917:           PetscInt k, ee = idxs[j];
918:           if (print) PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]);
919:           for (k = ii[ee]; k < ii[ee + 1]; k++) {
920:             /* set all candidates located on the edge as corners */
921:             if (PetscBTLookup(btvcand, jj[k])) {
922:               PetscInt k2, vv = jj[k];
923:               has_candidates = PETSC_TRUE;
924:               if (print) PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv);
925:               PetscBTSet(btv, vv);
926:               /* set all edge dofs connected to candidate as primals */
927:               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
928:                 if (marks[jjt[k2]] == mark) {
929:                   PetscInt k3, ee2 = jjt[k2];
930:                   if (print) PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2);
931:                   newprimals[cum++] = ee2;
932:                   /* finally set the new corners */
933:                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
934:                     if (print) PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]);
935:                     PetscBTSet(btv, jj[k3]);
936:                   }
937:                 }
938:               }
939:             } else {
940:               if (print) PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]);
941:             }
942:           }
943:         }
944:         if (!has_candidates) { /* circular edge */
945:           PetscInt k, ee = idxs[0], *tmarks;

947:           PetscCalloc1(ne, &tmarks);
948:           if (print) PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i);
949:           for (k = ii[ee]; k < ii[ee + 1]; k++) {
950:             PetscInt k2;
951:             if (print) PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]);
952:             PetscBTSet(btv, jj[k]);
953:             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
954:           }
955:           for (j = 0; j < size; j++) {
956:             if (tmarks[idxs[j]] > 1) {
957:               if (print) PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]);
958:               newprimals[cum++] = idxs[j];
959:             }
960:           }
961:           PetscFree(tmarks);
962:         }
963:         ISRestoreIndices(eedges[i], &idxs);
964:       }
965:       ISDestroy(&extcols[i]);
966:     }
967:     PetscFree(extcols);
968:     MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done);
969:     PetscSortRemoveDupsInt(&cum, newprimals);
970:     if (fl2g) {
971:       ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals);
972:       ISDestroy(&primals);
973:       for (i = 0; i < nee; i++) ISDestroy(&eedges[i]);
974:       PetscFree(eedges);
975:     }
976:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals);
977:     ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals);
978:     PetscFree(newprimals);
979:     PCBDDCSetPrimalVerticesLocalIS(pc, primals);
980:     ISDestroy(&primals);
981:     PCBDDCAnalyzeInterface(pc);
982:     pcbddc->mat_graph->twodim = PETSC_FALSE;
983:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals);
984:     if (fl2g) {
985:       ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals);
986:       PetscMalloc1(nee, &eedges);
987:       for (i = 0; i < nee; i++) ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]);
988:     } else {
989:       eedges  = alleedges;
990:       primals = allprimals;
991:     }
992:     PetscCalloc1(nee, &extcols);

994:     /* Mark again */
995:     PetscArrayzero(marks, ne);
996:     for (i = 0; i < nee; i++) {
997:       PetscInt size, mark = i + 1;

999:       ISGetLocalSize(eedges[i], &size);
1000:       ISGetIndices(eedges[i], &idxs);
1001:       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1002:       ISRestoreIndices(eedges[i], &idxs);
1003:     }
1004:     if (print) {
1005:       PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass");
1006:       ISView(primals, NULL);
1007:     }

1009:     /* Recompute extended cols */
1010:     eerr = PETSC_FALSE;
1011:     for (i = 0; i < nee; i++) {
1012:       PetscInt size;

1014:       cum = 0;
1015:       ISGetLocalSize(eedges[i], &size);
1016:       if (!size && nedfieldlocal) continue;
1018:       ISGetIndices(eedges[i], &idxs);
1019:       for (j = 0; j < size; j++) {
1020:         PetscInt k, ee = idxs[j];
1021:         for (k = ii[ee]; k < ii[ee + 1]; k++)
1022:           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1023:       }
1024:       ISRestoreIndices(eedges[i], &idxs);
1025:       PetscSortRemoveDupsInt(&cum, extrow);
1026:       ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs);
1027:       PetscSortIntWithArray(cum, gidxs, extrow);
1028:       ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]);
1029:       if (cum != size - 1) {
1030:         if (print) {
1031:           PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass");
1032:           ISView(eedges[i], NULL);
1033:           PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass");
1034:           ISView(extcols[i], NULL);
1035:         }
1036:         eerr = PETSC_TRUE;
1037:       }
1038:     }
1039:   }
1040:   MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1041:   PetscFree2(extrow, gidxs);
1042:   PetscBTDestroy(&bter);
1043:   if (print) PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF);
1044:   /* an error should not occur at this point */

1047:   /* Check the number of endpoints */
1048:   MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1049:   PetscMalloc1(2 * nee, &corners);
1050:   PetscMalloc1(nee, &cedges);
1051:   for (i = 0; i < nee; i++) {
1052:     PetscInt size, found = 0, gc[2];

1054:     /* init with defaults */
1055:     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1056:     ISGetLocalSize(eedges[i], &size);
1057:     if (!size && nedfieldlocal) continue;
1059:     ISGetIndices(eedges[i], &idxs);
1060:     PetscBTMemzero(nv, btvc);
1061:     for (j = 0; j < size; j++) {
1062:       PetscInt k, ee = idxs[j];
1063:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1064:         PetscInt vv = jj[k];
1065:         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1067:           corners[i * 2 + found++] = vv;
1068:         }
1069:       }
1070:     }
1071:     if (found != 2) {
1072:       PetscInt e;
1073:       if (fl2g) {
1074:         ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e);
1075:       } else {
1076:         e = idxs[0];
1077:       }
1078:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1079:     }

1081:     /* get primal dof index on this coarse edge */
1082:     ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc);
1083:     if (gc[0] > gc[1]) {
1084:       PetscInt swap      = corners[2 * i];
1085:       corners[2 * i]     = corners[2 * i + 1];
1086:       corners[2 * i + 1] = swap;
1087:     }
1088:     cedges[i] = idxs[size - 1];
1089:     ISRestoreIndices(eedges[i], &idxs);
1090:     if (print) PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]);
1091:   }
1092:   MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1093:   PetscBTDestroy(&btvc);

1095:   if (PetscDefined(USE_DEBUG)) {
1096:     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1097:      not interfere with neighbouring coarse edges */
1098:     PetscMalloc1(nee + 1, &emarks);
1099:     MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1100:     for (i = 0; i < nv; i++) {
1101:       PetscInt emax = 0, eemax = 0;

1103:       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1104:       PetscArrayzero(emarks, nee + 1);
1105:       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1106:       for (j = 1; j < nee + 1; j++) {
1107:         if (emax < emarks[j]) {
1108:           emax  = emarks[j];
1109:           eemax = j;
1110:         }
1111:       }
1112:       /* not relevant for edges */
1113:       if (!eemax) continue;

1115:       for (j = ii[i]; j < ii[i + 1]; j++) {
1117:       }
1118:     }
1119:     PetscFree(emarks);
1120:     MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1121:   }

1123:   /* Compute extended rows indices for edge blocks of the change of basis */
1124:   MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1125:   MatSeqAIJGetMaxRowNonzeros(lGt, &extmem);
1126:   extmem *= maxsize;
1127:   PetscMalloc1(extmem * nee, &extrow);
1128:   PetscMalloc1(nee, &extrows);
1129:   PetscCalloc1(nee, &extrowcum);
1130:   for (i = 0; i < nv; i++) {
1131:     PetscInt mark = 0, size, start;

1133:     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1134:     for (j = ii[i]; j < ii[i + 1]; j++)
1135:       if (marks[jj[j]] && !mark) mark = marks[jj[j]];

1137:     /* not relevant */
1138:     if (!mark) continue;

1140:     /* import extended row */
1141:     mark--;
1142:     start = mark * extmem + extrowcum[mark];
1143:     size  = ii[i + 1] - ii[i];
1145:     PetscArraycpy(extrow + start, jj + ii[i], size);
1146:     extrowcum[mark] += size;
1147:   }
1148:   MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done);
1149:   MatDestroy(&lGt);
1150:   PetscFree(marks);

1152:   /* Compress extrows */
1153:   cum = 0;
1154:   for (i = 0; i < nee; i++) {
1155:     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1156:     PetscSortRemoveDupsInt(&size, start);
1157:     ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]);
1158:     cum = PetscMax(cum, size);
1159:   }
1160:   PetscFree(extrowcum);
1161:   PetscBTDestroy(&btv);
1162:   PetscBTDestroy(&btvcand);

1164:   /* Workspace for lapack inner calls and VecSetValues */
1165:   PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork);

1167:   /* Create change of basis matrix (preallocation can be improved) */
1168:   MatCreate(comm, &T);
1169:   MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N);
1170:   MatSetType(T, MATAIJ);
1171:   MatSeqAIJSetPreallocation(T, 10, NULL);
1172:   MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL);
1173:   MatSetLocalToGlobalMapping(T, al2g, al2g);
1174:   MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
1175:   MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE);
1176:   ISLocalToGlobalMappingDestroy(&al2g);

1178:   /* Defaults to identity */
1179:   MatCreateVecs(pc->pmat, &tvec, NULL);
1180:   VecSet(tvec, 1.0);
1181:   MatDiagonalSet(T, tvec, INSERT_VALUES);
1182:   VecDestroy(&tvec);

1184:   /* Create discrete gradient for the coarser level if needed */
1185:   MatDestroy(&pcbddc->nedcG);
1186:   ISDestroy(&pcbddc->nedclocal);
1187:   if (pcbddc->current_level < pcbddc->max_levels) {
1188:     ISLocalToGlobalMapping cel2g, cvl2g;
1189:     IS                     wis, gwis;
1190:     PetscInt               cnv, cne;

1192:     ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis);
1193:     if (fl2g) {
1194:       ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal);
1195:     } else {
1196:       PetscObjectReference((PetscObject)wis);
1197:       pcbddc->nedclocal = wis;
1198:     }
1199:     ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis);
1200:     ISDestroy(&wis);
1201:     ISRenumber(gwis, NULL, &cne, &wis);
1202:     ISLocalToGlobalMappingCreateIS(wis, &cel2g);
1203:     ISDestroy(&wis);
1204:     ISDestroy(&gwis);

1206:     ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis);
1207:     ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis);
1208:     ISDestroy(&wis);
1209:     ISRenumber(gwis, NULL, &cnv, &wis);
1210:     ISLocalToGlobalMappingCreateIS(wis, &cvl2g);
1211:     ISDestroy(&wis);
1212:     ISDestroy(&gwis);

1214:     MatCreate(comm, &pcbddc->nedcG);
1215:     MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv);
1216:     MatSetType(pcbddc->nedcG, MATAIJ);
1217:     MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL);
1218:     MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL);
1219:     MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g);
1220:     ISLocalToGlobalMappingDestroy(&cel2g);
1221:     ISLocalToGlobalMappingDestroy(&cvl2g);
1222:   }
1223:   ISLocalToGlobalMappingDestroy(&vl2g);

1225: #if defined(PRINT_GDET)
1226:   inc = 0;
1227:   lev = pcbddc->current_level;
1228: #endif

1230:   /* Insert values in the change of basis matrix */
1231:   for (i = 0; i < nee; i++) {
1232:     Mat         Gins = NULL, GKins = NULL;
1233:     IS          cornersis = NULL;
1234:     PetscScalar cvals[2];

1236:     if (pcbddc->nedcG) ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis);
1237:     PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork);
1238:     if (Gins && GKins) {
1239:       const PetscScalar *data;
1240:       const PetscInt    *rows, *cols;
1241:       PetscInt           nrh, nch, nrc, ncc;

1243:       ISGetIndices(eedges[i], &cols);
1244:       /* H1 */
1245:       ISGetIndices(extrows[i], &rows);
1246:       MatGetSize(Gins, &nrh, &nch);
1247:       MatDenseGetArrayRead(Gins, &data);
1248:       MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES);
1249:       MatDenseRestoreArrayRead(Gins, &data);
1250:       ISRestoreIndices(extrows[i], &rows);
1251:       /* complement */
1252:       MatGetSize(GKins, &nrc, &ncc);
1256:       MatDenseGetArrayRead(GKins, &data);
1257:       MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES);
1258:       MatDenseRestoreArrayRead(GKins, &data);

1260:       /* coarse discrete gradient */
1261:       if (pcbddc->nedcG) {
1262:         PetscInt cols[2];

1264:         cols[0] = 2 * i;
1265:         cols[1] = 2 * i + 1;
1266:         MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES);
1267:       }
1268:       ISRestoreIndices(eedges[i], &cols);
1269:     }
1270:     ISDestroy(&extrows[i]);
1271:     ISDestroy(&extcols[i]);
1272:     ISDestroy(&cornersis);
1273:     MatDestroy(&Gins);
1274:     MatDestroy(&GKins);
1275:   }
1276:   ISLocalToGlobalMappingDestroy(&el2g);

1278:   /* Start assembling */
1279:   MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY);
1280:   if (pcbddc->nedcG) MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY);

1282:   /* Free */
1283:   if (fl2g) {
1284:     ISDestroy(&primals);
1285:     for (i = 0; i < nee; i++) ISDestroy(&eedges[i]);
1286:     PetscFree(eedges);
1287:   }

1289:   /* hack mat_graph with primal dofs on the coarse edges */
1290:   {
1291:     PCBDDCGraph graph  = pcbddc->mat_graph;
1292:     PetscInt   *oqueue = graph->queue;
1293:     PetscInt   *ocptr  = graph->cptr;
1294:     PetscInt    ncc, *idxs;

1296:     /* find first primal edge */
1297:     if (pcbddc->nedclocal) {
1298:       ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs);
1299:     } else {
1300:       if (fl2g) ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges);
1301:       idxs = cedges;
1302:     }
1303:     cum = 0;
1304:     while (cum < nee && cedges[cum] < 0) cum++;

1306:     /* adapt connected components */
1307:     PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue);
1308:     graph->cptr[0] = 0;
1309:     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1310:       PetscInt lc = ocptr[i + 1] - ocptr[i];
1311:       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1312:         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1313:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1314:         ncc++;
1315:         lc--;
1316:         cum++;
1317:         while (cum < nee && cedges[cum] < 0) cum++;
1318:       }
1319:       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1320:       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1321:       ncc++;
1322:     }
1323:     graph->ncc = ncc;
1324:     if (pcbddc->nedclocal) ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs);
1325:     PetscFree2(ocptr, oqueue);
1326:   }
1327:   ISLocalToGlobalMappingDestroy(&fl2g);
1328:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals);
1329:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1330:   MatDestroy(&conn);

1332:   ISDestroy(&nedfieldlocal);
1333:   PetscFree(extrow);
1334:   PetscFree2(work, rwork);
1335:   PetscFree(corners);
1336:   PetscFree(cedges);
1337:   PetscFree(extrows);
1338:   PetscFree(extcols);
1339:   MatDestroy(&lG);

1341:   /* Complete assembling */
1342:   MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY);
1343:   if (pcbddc->nedcG) {
1344:     MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY);
1345: #if 0
1346:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1347:     MatView(pcbddc->nedcG,NULL);
1348: #endif
1349:   }

1351:   /* set change of basis */
1352:   PCBDDCSetChangeOfBasisMat(pc, T, singular);
1353:   MatDestroy(&T);

1355:   return 0;
1356: }

1358: /* the near-null space of BDDC carries information on quadrature weights,
1359:    and these can be collinear -> so cheat with MatNullSpaceCreate
1360:    and create a suitable set of basis vectors first */
1361: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1362: {
1363:   PetscInt i;

1365:   for (i = 0; i < nvecs; i++) {
1366:     PetscInt first, last;

1368:     VecGetOwnershipRange(quad_vecs[i], &first, &last);
1370:     if (i >= first && i < last) {
1371:       PetscScalar *data;
1372:       VecGetArray(quad_vecs[i], &data);
1373:       if (!has_const) {
1374:         data[i - first] = 1.;
1375:       } else {
1376:         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1377:         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1378:       }
1379:       VecRestoreArray(quad_vecs[i], &data);
1380:     }
1381:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1382:   }
1383:   MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp);
1384:   for (i = 0; i < nvecs; i++) { /* reset vectors */
1385:     PetscInt first, last;
1386:     VecLockReadPop(quad_vecs[i]);
1387:     VecGetOwnershipRange(quad_vecs[i], &first, &last);
1388:     if (i >= first && i < last) {
1389:       PetscScalar *data;
1390:       VecGetArray(quad_vecs[i], &data);
1391:       if (!has_const) {
1392:         data[i - first] = 0.;
1393:       } else {
1394:         data[2 * i - first]     = 0.;
1395:         data[2 * i - first + 1] = 0.;
1396:       }
1397:       VecRestoreArray(quad_vecs[i], &data);
1398:     }
1399:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1400:     VecLockReadPush(quad_vecs[i]);
1401:   }
1402:   return 0;
1403: }

1405: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1406: {
1407:   Mat                    loc_divudotp;
1408:   Vec                    p, v, vins, quad_vec, *quad_vecs;
1409:   ISLocalToGlobalMapping map;
1410:   PetscScalar           *vals;
1411:   const PetscScalar     *array;
1412:   PetscInt               i, maxneighs = 0, maxsize, *gidxs;
1413:   PetscInt               n_neigh, *neigh, *n_shared, **shared;
1414:   PetscMPIInt            rank;

1416:   ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared);
1417:   for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs);
1418:   MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
1419:   if (!maxneighs) {
1420:     ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared);
1421:     *nnsp = NULL;
1422:     return 0;
1423:   }
1424:   maxsize = 0;
1425:   for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize);
1426:   PetscMalloc2(maxsize, &gidxs, maxsize, &vals);
1427:   /* create vectors to hold quadrature weights */
1428:   MatCreateVecs(A, &quad_vec, NULL);
1429:   if (!transpose) {
1430:     MatISGetLocalToGlobalMapping(A, &map, NULL);
1431:   } else {
1432:     MatISGetLocalToGlobalMapping(A, NULL, &map);
1433:   }
1434:   VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs);
1435:   VecDestroy(&quad_vec);
1436:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp);
1437:   for (i = 0; i < maxneighs; i++) VecLockReadPop(quad_vecs[i]);

1439:   /* compute local quad vec */
1440:   MatISGetLocalMat(divudotp, &loc_divudotp);
1441:   if (!transpose) {
1442:     MatCreateVecs(loc_divudotp, &v, &p);
1443:   } else {
1444:     MatCreateVecs(loc_divudotp, &p, &v);
1445:   }
1446:   VecSet(p, 1.);
1447:   if (!transpose) {
1448:     MatMultTranspose(loc_divudotp, p, v);
1449:   } else {
1450:     MatMult(loc_divudotp, p, v);
1451:   }
1452:   if (vl2l) {
1453:     Mat        lA;
1454:     VecScatter sc;

1456:     MatISGetLocalMat(A, &lA);
1457:     MatCreateVecs(lA, &vins, NULL);
1458:     VecScatterCreate(v, NULL, vins, vl2l, &sc);
1459:     VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD);
1460:     VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD);
1461:     VecScatterDestroy(&sc);
1462:   } else {
1463:     vins = v;
1464:   }
1465:   VecGetArrayRead(vins, &array);
1466:   VecDestroy(&p);

1468:   /* insert in global quadrature vecs */
1469:   MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank);
1470:   for (i = 1; i < n_neigh; i++) {
1471:     const PetscInt *idxs;
1472:     PetscInt        idx, nn, j;

1474:     idxs = shared[i];
1475:     nn   = n_shared[i];
1476:     for (j = 0; j < nn; j++) vals[j] = array[idxs[j]];
1477:     PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx);
1478:     idx = -(idx + 1);
1480:     ISLocalToGlobalMappingApply(map, nn, idxs, gidxs);
1481:     VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES);
1482:   }
1483:   ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared);
1484:   VecRestoreArrayRead(vins, &array);
1485:   if (vl2l) VecDestroy(&vins);
1486:   VecDestroy(&v);
1487:   PetscFree2(gidxs, vals);

1489:   /* assemble near null space */
1490:   for (i = 0; i < maxneighs; i++) VecAssemblyBegin(quad_vecs[i]);
1491:   for (i = 0; i < maxneighs; i++) {
1492:     VecAssemblyEnd(quad_vecs[i]);
1493:     VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view");
1494:     VecLockReadPush(quad_vecs[i]);
1495:   }
1496:   VecDestroyVecs(maxneighs, &quad_vecs);
1497:   return 0;
1498: }

1500: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1501: {
1502:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

1504:   if (primalv) {
1505:     if (pcbddc->user_primal_vertices_local) {
1506:       IS list[2], newp;

1508:       list[0] = primalv;
1509:       list[1] = pcbddc->user_primal_vertices_local;
1510:       ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp);
1511:       ISSortRemoveDups(newp);
1512:       ISDestroy(&list[1]);
1513:       pcbddc->user_primal_vertices_local = newp;
1514:     } else {
1515:       PCBDDCSetPrimalVerticesLocalIS(pc, primalv);
1516:     }
1517:   }
1518:   return 0;
1519: }

1521: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1522: {
1523:   PetscInt f, *comp = (PetscInt *)ctx;

1525:   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1526:   return 0;
1527: }

1529: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1530: {
1531:   Vec       local, global;
1532:   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1533:   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1534:   PetscBool monolithic = PETSC_FALSE;

1536:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1537:   PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL);
1538:   PetscOptionsEnd();
1539:   /* need to convert from global to local topology information and remove references to information in global ordering */
1540:   MatCreateVecs(pc->pmat, &global, NULL);
1541:   MatCreateVecs(matis->A, &local, NULL);
1542:   VecBindToCPU(global, PETSC_TRUE);
1543:   VecBindToCPU(local, PETSC_TRUE);
1544:   if (monolithic) { /* just get block size to properly compute vertices */
1545:     if (pcbddc->vertex_size == 1) MatGetBlockSize(pc->pmat, &pcbddc->vertex_size);
1546:     goto boundary;
1547:   }

1549:   if (pcbddc->user_provided_isfordofs) {
1550:     if (pcbddc->n_ISForDofs) {
1551:       PetscInt i;

1553:       PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal);
1554:       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1555:         PetscInt bs;

1557:         PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]);
1558:         ISGetBlockSize(pcbddc->ISForDofs[i], &bs);
1559:         ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs);
1560:         ISDestroy(&pcbddc->ISForDofs[i]);
1561:       }
1562:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1563:       pcbddc->n_ISForDofs      = 0;
1564:       PetscFree(pcbddc->ISForDofs);
1565:     }
1566:   } else {
1567:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1568:       DM dm;

1570:       MatGetDM(pc->pmat, &dm);
1571:       if (!dm) PCGetDM(pc, &dm);
1572:       if (dm) {
1573:         IS      *fields;
1574:         PetscInt nf, i;

1576:         DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL);
1577:         PetscMalloc1(nf, &pcbddc->ISForDofsLocal);
1578:         for (i = 0; i < nf; i++) {
1579:           PetscInt bs;

1581:           PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]);
1582:           ISGetBlockSize(fields[i], &bs);
1583:           ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs);
1584:           ISDestroy(&fields[i]);
1585:         }
1586:         PetscFree(fields);
1587:         pcbddc->n_ISForDofsLocal = nf;
1588:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1589:         PetscContainer c;

1591:         PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c);
1592:         if (c) {
1593:           MatISLocalFields lf;
1594:           PetscContainerGetPointer(c, (void **)&lf);
1595:           PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf);
1596:         } else { /* fallback, create the default fields if bs > 1 */
1597:           PetscInt i, n = matis->A->rmap->n;
1598:           MatGetBlockSize(pc->pmat, &i);
1599:           if (i > 1) {
1600:             pcbddc->n_ISForDofsLocal = i;
1601:             PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal);
1602:             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]);
1603:           }
1604:         }
1605:       }
1606:     } else {
1607:       PetscInt i;
1608:       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]);
1609:     }
1610:   }

1612: boundary:
1613:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1614:     PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal);
1615:   } else if (pcbddc->DirichletBoundariesLocal) {
1616:     PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal);
1617:   }
1618:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1619:     PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal);
1620:   } else if (pcbddc->NeumannBoundariesLocal) {
1621:     PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal);
1622:   }
1623:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local);
1624:   VecDestroy(&global);
1625:   VecDestroy(&local);
1626:   /* detect local disconnected subdomains if requested (use matis->A) */
1627:   if (pcbddc->detect_disconnected) {
1628:     IS        primalv = NULL;
1629:     PetscInt  i;
1630:     PetscBool filter = pcbddc->detect_disconnected_filter;

1632:     for (i = 0; i < pcbddc->n_local_subs; i++) ISDestroy(&pcbddc->local_subs[i]);
1633:     PetscFree(pcbddc->local_subs);
1634:     PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv);
1635:     PCBDDCAddPrimalVerticesLocalIS(pc, primalv);
1636:     ISDestroy(&primalv);
1637:   }
1638:   /* early stage corner detection */
1639:   {
1640:     DM dm;

1642:     MatGetDM(pc->pmat, &dm);
1643:     if (!dm) PCGetDM(pc, &dm);
1644:     if (dm) {
1645:       PetscBool isda;

1647:       PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda);
1648:       if (isda) {
1649:         ISLocalToGlobalMapping l2l;
1650:         IS                     corners;
1651:         Mat                    lA;
1652:         PetscBool              gl, lo;

1654:         {
1655:           Vec                cvec;
1656:           const PetscScalar *coords;
1657:           PetscInt           dof, n, cdim;
1658:           PetscBool          memc = PETSC_TRUE;

1660:           DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL);
1661:           DMGetCoordinates(dm, &cvec);
1662:           VecGetLocalSize(cvec, &n);
1663:           VecGetBlockSize(cvec, &cdim);
1664:           n /= cdim;
1665:           PetscFree(pcbddc->mat_graph->coords);
1666:           PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords);
1667:           VecGetArrayRead(cvec, &coords);
1668: #if defined(PETSC_USE_COMPLEX)
1669:           memc = PETSC_FALSE;
1670: #endif
1671:           if (dof != 1) memc = PETSC_FALSE;
1672:           if (memc) {
1673:             PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof);
1674:           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1675:             PetscReal *bcoords = pcbddc->mat_graph->coords;
1676:             PetscInt   i, b, d;

1678:             for (i = 0; i < n; i++) {
1679:               for (b = 0; b < dof; b++) {
1680:                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1681:               }
1682:             }
1683:           }
1684:           VecRestoreArrayRead(cvec, &coords);
1685:           pcbddc->mat_graph->cdim  = cdim;
1686:           pcbddc->mat_graph->cnloc = dof * n;
1687:           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1688:         }
1689:         DMDAGetSubdomainCornersIS(dm, &corners);
1690:         MatISGetLocalMat(pc->pmat, &lA);
1691:         MatGetLocalToGlobalMapping(lA, &l2l, NULL);
1692:         MatISRestoreLocalMat(pc->pmat, &lA);
1693:         lo = (PetscBool)(l2l && corners);
1694:         MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc));
1695:         if (gl) { /* From PETSc's DMDA */
1696:           const PetscInt *idx;
1697:           PetscInt        dof, bs, *idxout, n;

1699:           DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL);
1700:           ISLocalToGlobalMappingGetBlockSize(l2l, &bs);
1701:           ISGetLocalSize(corners, &n);
1702:           ISGetIndices(corners, &idx);
1703:           if (bs == dof) {
1704:             PetscMalloc1(n, &idxout);
1705:             ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout);
1706:           } else { /* the original DMDA local-to-local map have been modified */
1707:             PetscInt i, d;

1709:             PetscMalloc1(dof * n, &idxout);
1710:             for (i = 0; i < n; i++)
1711:               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1712:             ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout);

1714:             bs = 1;
1715:             n *= dof;
1716:           }
1717:           ISRestoreIndices(corners, &idx);
1718:           DMDARestoreSubdomainCornersIS(dm, &corners);
1719:           ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners);
1720:           PCBDDCAddPrimalVerticesLocalIS(pc, corners);
1721:           ISDestroy(&corners);
1722:           pcbddc->corner_selected  = PETSC_TRUE;
1723:           pcbddc->corner_selection = PETSC_TRUE;
1724:         }
1725:         if (corners) DMDARestoreSubdomainCornersIS(dm, &corners);
1726:       }
1727:     }
1728:   }
1729:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1730:     DM dm;

1732:     MatGetDM(pc->pmat, &dm);
1733:     if (!dm) PCGetDM(pc, &dm);
1734:     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1735:       Vec          vcoords;
1736:       PetscSection section;
1737:       PetscReal   *coords;
1738:       PetscInt     d, cdim, nl, nf, **ctxs;
1739:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1740:       /* debug coordinates */
1741:       PetscViewer       viewer;
1742:       PetscBool         flg;
1743:       PetscViewerFormat format;
1744:       const char       *prefix;

1746:       DMGetCoordinateDim(dm, &cdim);
1747:       DMGetLocalSection(dm, &section);
1748:       PetscSectionGetNumFields(section, &nf);
1749:       DMCreateGlobalVector(dm, &vcoords);
1750:       VecGetLocalSize(vcoords, &nl);
1751:       PetscMalloc1(nl * cdim, &coords);
1752:       PetscMalloc2(nf, &funcs, nf, &ctxs);
1753:       PetscMalloc1(nf, &ctxs[0]);
1754:       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1755:       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;

1757:       /* debug coordinates */
1758:       PCGetOptionsPrefix(pc, &prefix);
1759:       PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg);
1760:       if (flg) PetscViewerPushFormat(viewer, format);
1761:       for (d = 0; d < cdim; d++) {
1762:         PetscInt           i;
1763:         const PetscScalar *v;
1764:         char               name[16];

1766:         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1767:         PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d);
1768:         PetscObjectSetName((PetscObject)vcoords, name);
1769:         DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords);
1770:         if (flg) VecView(vcoords, viewer);
1771:         VecGetArrayRead(vcoords, &v);
1772:         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1773:         VecRestoreArrayRead(vcoords, &v);
1774:       }
1775:       VecDestroy(&vcoords);
1776:       PCSetCoordinates(pc, cdim, nl, coords);
1777:       PetscFree(coords);
1778:       PetscFree(ctxs[0]);
1779:       PetscFree2(funcs, ctxs);
1780:       if (flg) {
1781:         PetscViewerPopFormat(viewer);
1782:         PetscViewerDestroy(&viewer);
1783:       }
1784:     }
1785:   }
1786:   return 0;
1787: }

1789: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1790: {
1791:   Mat_IS         *matis = (Mat_IS *)(pc->pmat->data);
1792:   IS              nis;
1793:   const PetscInt *idxs;
1794:   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;

1797:   if (mop == MPI_LAND) {
1798:     /* init rootdata with true */
1799:     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1800:   } else {
1801:     PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n);
1802:   }
1803:   PetscArrayzero(matis->sf_leafdata, n);
1804:   ISGetLocalSize(*is, &nd);
1805:   ISGetIndices(*is, &idxs);
1806:   for (i = 0; i < nd; i++)
1807:     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1808:   ISRestoreIndices(*is, &idxs);
1809:   PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop);
1810:   PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop);
1811:   PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
1812:   PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE);
1813:   if (mop == MPI_LAND) {
1814:     PetscMalloc1(nd, &nidxs);
1815:   } else {
1816:     PetscMalloc1(n, &nidxs);
1817:   }
1818:   for (i = 0, nnd = 0; i < n; i++)
1819:     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
1820:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis);
1821:   ISDestroy(is);
1822:   *is = nis;
1823:   return 0;
1824: }

1826: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
1827: {
1828:   PC_IS   *pcis   = (PC_IS *)(pc->data);
1829:   PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);

1831:   if (!pcbddc->benign_have_null) return 0;
1832:   if (pcbddc->ChangeOfBasisMatrix) {
1833:     Vec swap;

1835:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change);
1836:     swap                = pcbddc->work_change;
1837:     pcbddc->work_change = r;
1838:     r                   = swap;
1839:   }
1840:   VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD);
1841:   VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD);
1842:   PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0);
1843:   KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D);
1844:   PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0);
1845:   KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D);
1846:   VecSet(z, 0.);
1847:   VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE);
1848:   VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE);
1849:   if (pcbddc->ChangeOfBasisMatrix) {
1850:     pcbddc->work_change = r;
1851:     VecCopy(z, pcbddc->work_change);
1852:     MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z);
1853:   }
1854:   return 0;
1855: }

1857: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1858: {
1859:   PCBDDCBenignMatMult_ctx ctx;
1860:   PetscBool               apply_right, apply_left, reset_x;

1862:   MatShellGetContext(A, &ctx);
1863:   if (transpose) {
1864:     apply_right = ctx->apply_left;
1865:     apply_left  = ctx->apply_right;
1866:   } else {
1867:     apply_right = ctx->apply_right;
1868:     apply_left  = ctx->apply_left;
1869:   }
1870:   reset_x = PETSC_FALSE;
1871:   if (apply_right) {
1872:     const PetscScalar *ax;
1873:     PetscInt           nl, i;

1875:     VecGetLocalSize(x, &nl);
1876:     VecGetArrayRead(x, &ax);
1877:     PetscArraycpy(ctx->work, ax, nl);
1878:     VecRestoreArrayRead(x, &ax);
1879:     for (i = 0; i < ctx->benign_n; i++) {
1880:       PetscScalar     sum, val;
1881:       const PetscInt *idxs;
1882:       PetscInt        nz, j;
1883:       ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz);
1884:       ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs);
1885:       sum = 0.;
1886:       if (ctx->apply_p0) {
1887:         val = ctx->work[idxs[nz - 1]];
1888:         for (j = 0; j < nz - 1; j++) {
1889:           sum += ctx->work[idxs[j]];
1890:           ctx->work[idxs[j]] += val;
1891:         }
1892:       } else {
1893:         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
1894:       }
1895:       ctx->work[idxs[nz - 1]] -= sum;
1896:       ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs);
1897:     }
1898:     VecPlaceArray(x, ctx->work);
1899:     reset_x = PETSC_TRUE;
1900:   }
1901:   if (transpose) {
1902:     MatMultTranspose(ctx->A, x, y);
1903:   } else {
1904:     MatMult(ctx->A, x, y);
1905:   }
1906:   if (reset_x) VecResetArray(x);
1907:   if (apply_left) {
1908:     PetscScalar *ay;
1909:     PetscInt     i;

1911:     VecGetArray(y, &ay);
1912:     for (i = 0; i < ctx->benign_n; i++) {
1913:       PetscScalar     sum, val;
1914:       const PetscInt *idxs;
1915:       PetscInt        nz, j;
1916:       ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz);
1917:       ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs);
1918:       val = -ay[idxs[nz - 1]];
1919:       if (ctx->apply_p0) {
1920:         sum = 0.;
1921:         for (j = 0; j < nz - 1; j++) {
1922:           sum += ay[idxs[j]];
1923:           ay[idxs[j]] += val;
1924:         }
1925:         ay[idxs[nz - 1]] += sum;
1926:       } else {
1927:         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
1928:         ay[idxs[nz - 1]] = 0.;
1929:       }
1930:       ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs);
1931:     }
1932:     VecRestoreArray(y, &ay);
1933:   }
1934:   return 0;
1935: }

1937: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1938: {
1939:   PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE);
1940:   return 0;
1941: }

1943: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1944: {
1945:   PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE);
1946:   return 0;
1947: }

1949: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1950: {
1951:   PC_IS                  *pcis   = (PC_IS *)pc->data;
1952:   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
1953:   PCBDDCBenignMatMult_ctx ctx;

1955:   if (!restore) {
1956:     Mat                A_IB, A_BI;
1957:     PetscScalar       *work;
1958:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

1961:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return 0;
1962:     PetscMalloc1(pcis->n, &work);
1963:     MatCreate(PETSC_COMM_SELF, &A_IB);
1964:     MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE);
1965:     MatSetType(A_IB, MATSHELL);
1966:     MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private);
1967:     MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private);
1968:     PetscNew(&ctx);
1969:     MatShellSetContext(A_IB, ctx);
1970:     ctx->apply_left  = PETSC_TRUE;
1971:     ctx->apply_right = PETSC_FALSE;
1972:     ctx->apply_p0    = PETSC_FALSE;
1973:     ctx->benign_n    = pcbddc->benign_n;
1974:     if (reuse) {
1975:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
1976:       ctx->free                 = PETSC_FALSE;
1977:     } else { /* TODO: could be optimized for successive solves */
1978:       ISLocalToGlobalMapping N_to_D;
1979:       PetscInt               i;

1981:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D);
1982:       PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs);
1983:       for (i = 0; i < pcbddc->benign_n; i++) ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]);
1984:       ISLocalToGlobalMappingDestroy(&N_to_D);
1985:       ctx->free = PETSC_TRUE;
1986:     }
1987:     ctx->A    = pcis->A_IB;
1988:     ctx->work = work;
1989:     MatSetUp(A_IB);
1990:     MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY);
1991:     MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY);
1992:     pcis->A_IB = A_IB;

1994:     /* A_BI as A_IB^T */
1995:     MatCreateTranspose(A_IB, &A_BI);
1996:     pcbddc->benign_original_mat = pcis->A_BI;
1997:     pcis->A_BI                  = A_BI;
1998:   } else {
1999:     if (!pcbddc->benign_original_mat) return 0;
2000:     MatShellGetContext(pcis->A_IB, &ctx);
2001:     MatDestroy(&pcis->A_IB);
2002:     pcis->A_IB = ctx->A;
2003:     ctx->A     = NULL;
2004:     MatDestroy(&pcis->A_BI);
2005:     pcis->A_BI                  = pcbddc->benign_original_mat;
2006:     pcbddc->benign_original_mat = NULL;
2007:     if (ctx->free) {
2008:       PetscInt i;
2009:       for (i = 0; i < ctx->benign_n; i++) ISDestroy(&ctx->benign_zerodiag_subs[i]);
2010:       PetscFree(ctx->benign_zerodiag_subs);
2011:     }
2012:     PetscFree(ctx->work);
2013:     PetscFree(ctx);
2014:   }
2015:   return 0;
2016: }

2018: /* used just in bddc debug mode */
2019: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2020: {
2021:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2022:   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2023:   Mat      An;

2025:   MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An);
2026:   MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL);
2027:   if (is1) {
2028:     MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B);
2029:     MatDestroy(&An);
2030:   } else {
2031:     *B = An;
2032:   }
2033:   return 0;
2034: }

2036: /* TODO: add reuse flag */
2037: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2038: {
2039:   Mat             Bt;
2040:   PetscScalar    *a, *bdata;
2041:   const PetscInt *ii, *ij;
2042:   PetscInt        m, n, i, nnz, *bii, *bij;
2043:   PetscBool       flg_row;

2045:   MatGetSize(A, &n, &m);
2046:   MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row);
2047:   MatSeqAIJGetArray(A, &a);
2048:   nnz = n;
2049:   for (i = 0; i < ii[n]; i++) {
2050:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2051:   }
2052:   PetscMalloc1(n + 1, &bii);
2053:   PetscMalloc1(nnz, &bij);
2054:   PetscMalloc1(nnz, &bdata);
2055:   nnz    = 0;
2056:   bii[0] = 0;
2057:   for (i = 0; i < n; i++) {
2058:     PetscInt j;
2059:     for (j = ii[i]; j < ii[i + 1]; j++) {
2060:       PetscScalar entry = a[j];
2061:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2062:         bij[nnz]   = ij[j];
2063:         bdata[nnz] = entry;
2064:         nnz++;
2065:       }
2066:     }
2067:     bii[i + 1] = nnz;
2068:   }
2069:   MatSeqAIJRestoreArray(A, &a);
2070:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt);
2071:   MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row);
2072:   {
2073:     Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data);
2074:     b->free_a     = PETSC_TRUE;
2075:     b->free_ij    = PETSC_TRUE;
2076:   }
2077:   if (*B == A) MatDestroy(&A);
2078:   *B = Bt;
2079:   return 0;
2080: }

2082: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2083: {
2084:   Mat                    B = NULL;
2085:   DM                     dm;
2086:   IS                     is_dummy, *cc_n;
2087:   ISLocalToGlobalMapping l2gmap_dummy;
2088:   PCBDDCGraph            graph;
2089:   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2090:   PetscInt               i, n;
2091:   PetscInt              *xadj, *adjncy;
2092:   PetscBool              isplex = PETSC_FALSE;

2094:   if (ncc) *ncc = 0;
2095:   if (cc) *cc = NULL;
2096:   if (primalv) *primalv = NULL;
2097:   PCBDDCGraphCreate(&graph);
2098:   MatGetDM(pc->pmat, &dm);
2099:   if (!dm) PCGetDM(pc, &dm);
2100:   if (dm) PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex);
2101:   if (filter) isplex = PETSC_FALSE;

2103:   if (isplex) { /* this code has been modified from plexpartition.c */
2104:     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2105:     PetscInt       *adj = NULL;
2106:     IS              cellNumbering;
2107:     const PetscInt *cellNum;
2108:     PetscBool       useCone, useClosure;
2109:     PetscSection    section;
2110:     PetscSegBuffer  adjBuffer;
2111:     PetscSF         sfPoint;

2113:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2114:     DMGetPointSF(dm, &sfPoint);
2115:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2116:     /* Build adjacency graph via a section/segbuffer */
2117:     PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section);
2118:     PetscSectionSetChart(section, pStart, pEnd);
2119:     PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer);
2120:     /* Always use FVM adjacency to create partitioner graph */
2121:     DMGetBasicAdjacency(dm, &useCone, &useClosure);
2122:     DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2123:     DMPlexGetCellNumbering(dm, &cellNumbering);
2124:     ISGetIndices(cellNumbering, &cellNum);
2125:     for (n = 0, p = pStart; p < pEnd; p++) {
2126:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2127:       if (nroots > 0) {
2128:         if (cellNum[p] < 0) continue;
2129:       }
2130:       adjSize = PETSC_DETERMINE;
2131:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2132:       for (a = 0; a < adjSize; ++a) {
2133:         const PetscInt point = adj[a];
2134:         if (pStart <= point && point < pEnd) {
2135:           PetscInt *PETSC_RESTRICT pBuf;
2136:           PetscSectionAddDof(section, p, 1);
2137:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2138:           *pBuf = point;
2139:         }
2140:       }
2141:       n++;
2142:     }
2143:     DMSetBasicAdjacency(dm, useCone, useClosure);
2144:     /* Derive CSR graph from section/segbuffer */
2145:     PetscSectionSetUp(section);
2146:     PetscSectionGetStorageSize(section, &size);
2147:     PetscMalloc1(n + 1, &xadj);
2148:     for (idx = 0, p = pStart; p < pEnd; p++) {
2149:       if (nroots > 0) {
2150:         if (cellNum[p] < 0) continue;
2151:       }
2152:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2153:     }
2154:     xadj[n] = size;
2155:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2156:     /* Clean up */
2157:     PetscSegBufferDestroy(&adjBuffer);
2158:     PetscSectionDestroy(&section);
2159:     PetscFree(adj);
2160:     graph->xadj   = xadj;
2161:     graph->adjncy = adjncy;
2162:   } else {
2163:     Mat       A;
2164:     PetscBool isseqaij, flg_row;

2166:     MatISGetLocalMat(pc->pmat, &A);
2167:     if (!A->rmap->N || !A->cmap->N) {
2168:       PCBDDCGraphDestroy(&graph);
2169:       return 0;
2170:     }
2171:     PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij);
2172:     if (!isseqaij && filter) {
2173:       PetscBool isseqdense;

2175:       PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense);
2176:       if (!isseqdense) {
2177:         MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B);
2178:       } else { /* TODO: rectangular case and LDA */
2179:         PetscScalar *array;
2180:         PetscReal    chop = 1.e-6;

2182:         MatDuplicate(A, MAT_COPY_VALUES, &B);
2183:         MatDenseGetArray(B, &array);
2184:         MatGetSize(B, &n, NULL);
2185:         for (i = 0; i < n; i++) {
2186:           PetscInt j;
2187:           for (j = i + 1; j < n; j++) {
2188:             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2189:             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2190:             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2191:           }
2192:         }
2193:         MatDenseRestoreArray(B, &array);
2194:         MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B);
2195:       }
2196:     } else {
2197:       PetscObjectReference((PetscObject)A);
2198:       B = A;
2199:     }
2200:     MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row);

2202:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2203:     if (filter) {
2204:       PetscScalar *data;
2205:       PetscInt     j, cum;

2207:       PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered);
2208:       MatSeqAIJGetArray(B, &data);
2209:       cum = 0;
2210:       for (i = 0; i < n; i++) {
2211:         PetscInt t;

2213:         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2214:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2215:           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2216:         }
2217:         t                = xadj_filtered[i];
2218:         xadj_filtered[i] = cum;
2219:         cum += t;
2220:       }
2221:       MatSeqAIJRestoreArray(B, &data);
2222:       graph->xadj   = xadj_filtered;
2223:       graph->adjncy = adjncy_filtered;
2224:     } else {
2225:       graph->xadj   = xadj;
2226:       graph->adjncy = adjncy;
2227:     }
2228:   }
2229:   /* compute local connected components using PCBDDCGraph */
2230:   ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy);
2231:   ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy);
2232:   ISDestroy(&is_dummy);
2233:   PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT);
2234:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2235:   PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL);
2236:   PCBDDCGraphComputeConnectedComponents(graph);

2238:   /* partial clean up */
2239:   PetscFree2(xadj_filtered, adjncy_filtered);
2240:   if (B) {
2241:     PetscBool flg_row;
2242:     MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row);
2243:     MatDestroy(&B);
2244:   }
2245:   if (isplex) {
2246:     PetscFree(xadj);
2247:     PetscFree(adjncy);
2248:   }

2250:   /* get back data */
2251:   if (isplex) {
2252:     if (ncc) *ncc = graph->ncc;
2253:     if (cc || primalv) {
2254:       Mat          A;
2255:       PetscBT      btv, btvt;
2256:       PetscSection subSection;
2257:       PetscInt    *ids, cum, cump, *cids, *pids;

2259:       DMPlexGetSubdomainSection(dm, &subSection);
2260:       MatISGetLocalMat(pc->pmat, &A);
2261:       PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids);
2262:       PetscBTCreate(A->rmap->n, &btv);
2263:       PetscBTCreate(A->rmap->n, &btvt);

2265:       cids[0] = 0;
2266:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2267:         PetscInt j;

2269:         PetscBTMemzero(A->rmap->n, btvt);
2270:         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2271:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2273:           DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure);
2274:           for (k = 0; k < 2 * size; k += 2) {
2275:             PetscInt s, pp, p = closure[k], off, dof, cdof;

2277:             PetscSectionGetConstraintDof(subSection, p, &cdof);
2278:             PetscSectionGetOffset(subSection, p, &off);
2279:             PetscSectionGetDof(subSection, p, &dof);
2280:             for (s = 0; s < dof - cdof; s++) {
2281:               if (PetscBTLookupSet(btvt, off + s)) continue;
2282:               if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2283:               else pids[cump++] = off + s; /* cross-vertex */
2284:             }
2285:             DMPlexGetTreeParent(dm, p, &pp, NULL);
2286:             if (pp != p) {
2287:               PetscSectionGetConstraintDof(subSection, pp, &cdof);
2288:               PetscSectionGetOffset(subSection, pp, &off);
2289:               PetscSectionGetDof(subSection, pp, &dof);
2290:               for (s = 0; s < dof - cdof; s++) {
2291:                 if (PetscBTLookupSet(btvt, off + s)) continue;
2292:                 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2293:                 else pids[cump++] = off + s; /* cross-vertex */
2294:               }
2295:             }
2296:           }
2297:           DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure);
2298:         }
2299:         cids[i + 1] = cum;
2300:         /* mark dofs as already assigned */
2301:         for (j = cids[i]; j < cids[i + 1]; j++) PetscBTSet(btv, ids[j]);
2302:       }
2303:       if (cc) {
2304:         PetscMalloc1(graph->ncc, &cc_n);
2305:         for (i = 0; i < graph->ncc; i++) ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]);
2306:         *cc = cc_n;
2307:       }
2308:       if (primalv) ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv);
2309:       PetscFree3(ids, cids, pids);
2310:       PetscBTDestroy(&btv);
2311:       PetscBTDestroy(&btvt);
2312:     }
2313:   } else {
2314:     if (ncc) *ncc = graph->ncc;
2315:     if (cc) {
2316:       PetscMalloc1(graph->ncc, &cc_n);
2317:       for (i = 0; i < graph->ncc; i++) ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]);
2318:       *cc = cc_n;
2319:     }
2320:   }
2321:   /* clean up graph */
2322:   graph->xadj   = NULL;
2323:   graph->adjncy = NULL;
2324:   PCBDDCGraphDestroy(&graph);
2325:   return 0;
2326: }

2328: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2329: {
2330:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2331:   PC_IS   *pcis   = (PC_IS *)(pc->data);
2332:   IS       dirIS  = NULL;
2333:   PetscInt i;

2335:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS);
2336:   if (zerodiag) {
2337:     Mat             A;
2338:     Vec             vec3_N;
2339:     PetscScalar    *vals;
2340:     const PetscInt *idxs;
2341:     PetscInt        nz, *count;

2343:     /* p0 */
2344:     VecSet(pcis->vec1_N, 0.);
2345:     PetscMalloc1(pcis->n, &vals);
2346:     ISGetLocalSize(zerodiag, &nz);
2347:     ISGetIndices(zerodiag, &idxs);
2348:     for (i = 0; i < nz; i++) vals[i] = 1.;
2349:     VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES);
2350:     VecAssemblyBegin(pcis->vec1_N);
2351:     VecAssemblyEnd(pcis->vec1_N);
2352:     /* v_I */
2353:     VecSetRandom(pcis->vec2_N, NULL);
2354:     for (i = 0; i < nz; i++) vals[i] = 0.;
2355:     VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES);
2356:     ISRestoreIndices(zerodiag, &idxs);
2357:     ISGetIndices(pcis->is_B_local, &idxs);
2358:     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2359:     VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES);
2360:     ISRestoreIndices(pcis->is_B_local, &idxs);
2361:     if (dirIS) {
2362:       PetscInt n;

2364:       ISGetLocalSize(dirIS, &n);
2365:       ISGetIndices(dirIS, &idxs);
2366:       for (i = 0; i < n; i++) vals[i] = 0.;
2367:       VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES);
2368:       ISRestoreIndices(dirIS, &idxs);
2369:     }
2370:     VecAssemblyBegin(pcis->vec2_N);
2371:     VecAssemblyEnd(pcis->vec2_N);
2372:     VecDuplicate(pcis->vec1_N, &vec3_N);
2373:     VecSet(vec3_N, 0.);
2374:     MatISGetLocalMat(pc->pmat, &A);
2375:     MatMult(A, pcis->vec1_N, vec3_N);
2376:     VecDot(vec3_N, pcis->vec2_N, &vals[0]);
2378:     PetscFree(vals);
2379:     VecDestroy(&vec3_N);

2381:     /* there should not be any pressure dofs lying on the interface */
2382:     PetscCalloc1(pcis->n, &count);
2383:     ISGetIndices(pcis->is_B_local, &idxs);
2384:     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2385:     ISRestoreIndices(pcis->is_B_local, &idxs);
2386:     ISGetIndices(zerodiag, &idxs);
2388:     ISRestoreIndices(zerodiag, &idxs);
2389:     PetscFree(count);
2390:   }
2391:   ISDestroy(&dirIS);

2393:   /* check PCBDDCBenignGetOrSetP0 */
2394:   VecSetRandom(pcis->vec1_global, NULL);
2395:   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2396:   PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE);
2397:   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2398:   PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE);
2399:   for (i = 0; i < pcbddc->benign_n; i++) {
2400:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2402:   }
2403:   return 0;
2404: }

2406: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2407: {
2408:   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2409:   Mat_IS   *matis     = (Mat_IS *)(pc->pmat->data);
2410:   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2411:   PetscInt  nz, n, benign_n, bsp = 1;
2412:   PetscInt *interior_dofs, n_interior_dofs, nneu;
2413:   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;

2415:   if (reuse) goto project_b0;
2416:   PetscSFDestroy(&pcbddc->benign_sf);
2417:   MatDestroy(&pcbddc->benign_B0);
2418:   for (n = 0; n < pcbddc->benign_n; n++) ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2419:   PetscFree(pcbddc->benign_zerodiag_subs);
2420:   has_null_pressures = PETSC_TRUE;
2421:   have_null          = PETSC_TRUE;
2422:   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2423:      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2424:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2425:      If not, a change of basis on pressures is not needed
2426:      since the local Schur complements are already SPD
2427:   */
2428:   if (pcbddc->n_ISForDofsLocal) {
2429:     IS        iP = NULL;
2430:     PetscInt  p, *pp;
2431:     PetscBool flg;

2433:     PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp);
2434:     n = pcbddc->n_ISForDofsLocal;
2435:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2436:     PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg);
2437:     PetscOptionsEnd();
2438:     if (!flg) {
2439:       n     = 1;
2440:       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2441:     }

2443:     bsp = 0;
2444:     for (p = 0; p < n; p++) {
2445:       PetscInt bs;

2448:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs);
2449:       bsp += bs;
2450:     }
2451:     PetscMalloc1(bsp, &bzerodiag);
2452:     bsp = 0;
2453:     for (p = 0; p < n; p++) {
2454:       const PetscInt *idxs;
2455:       PetscInt        b, bs, npl, *bidxs;

2457:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs);
2458:       ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl);
2459:       ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs);
2460:       PetscMalloc1(npl / bs, &bidxs);
2461:       for (b = 0; b < bs; b++) {
2462:         PetscInt i;

2464:         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2465:         ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]);
2466:         bsp++;
2467:       }
2468:       PetscFree(bidxs);
2469:       ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs);
2470:     }
2471:     ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures);

2473:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2474:     PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP);
2475:     if (iP) {
2476:       IS newpressures;

2478:       ISDifference(pressures, iP, &newpressures);
2479:       ISDestroy(&pressures);
2480:       pressures = newpressures;
2481:     }
2482:     ISSorted(pressures, &sorted);
2483:     if (!sorted) ISSort(pressures);
2484:     PetscFree(pp);
2485:   }

2487:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2488:   MatGetLocalSize(pcbddc->local_mat, &n, NULL);
2489:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2490:   MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag);
2491:   ISSorted(zerodiag, &sorted);
2492:   if (!sorted) ISSort(zerodiag);
2493:   PetscObjectReference((PetscObject)zerodiag);
2494:   zerodiag_save = zerodiag;
2495:   ISGetLocalSize(zerodiag, &nz);
2496:   if (!nz) {
2497:     if (n) have_null = PETSC_FALSE;
2498:     has_null_pressures = PETSC_FALSE;
2499:     ISDestroy(&zerodiag);
2500:   }
2501:   recompute_zerodiag = PETSC_FALSE;

2503:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2504:   zerodiag_subs   = NULL;
2505:   benign_n        = 0;
2506:   n_interior_dofs = 0;
2507:   interior_dofs   = NULL;
2508:   nneu            = 0;
2509:   if (pcbddc->NeumannBoundariesLocal) ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu);
2510:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2511:   if (checkb) { /* need to compute interior nodes */
2512:     PetscInt  n, i, j;
2513:     PetscInt  n_neigh, *neigh, *n_shared, **shared;
2514:     PetscInt *iwork;

2516:     ISLocalToGlobalMappingGetSize(matis->rmapping, &n);
2517:     ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared);
2518:     PetscCalloc1(n, &iwork);
2519:     PetscMalloc1(n, &interior_dofs);
2520:     for (i = 1; i < n_neigh; i++)
2521:       for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1;
2522:     for (i = 0; i < n; i++)
2523:       if (!iwork[i]) interior_dofs[n_interior_dofs++] = i;
2524:     PetscFree(iwork);
2525:     ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared);
2526:   }
2527:   if (has_null_pressures) {
2528:     IS             *subs;
2529:     PetscInt        nsubs, i, j, nl;
2530:     const PetscInt *idxs;
2531:     PetscScalar    *array;
2532:     Vec            *work;

2534:     subs  = pcbddc->local_subs;
2535:     nsubs = pcbddc->n_local_subs;
2536:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2537:     if (checkb) {
2538:       VecDuplicateVecs(matis->y, 2, &work);
2539:       ISGetLocalSize(zerodiag, &nl);
2540:       ISGetIndices(zerodiag, &idxs);
2541:       /* work[0] = 1_p */
2542:       VecSet(work[0], 0.);
2543:       VecGetArray(work[0], &array);
2544:       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2545:       VecRestoreArray(work[0], &array);
2546:       /* work[0] = 1_v */
2547:       VecSet(work[1], 1.);
2548:       VecGetArray(work[1], &array);
2549:       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2550:       VecRestoreArray(work[1], &array);
2551:       ISRestoreIndices(zerodiag, &idxs);
2552:     }

2554:     if (nsubs > 1 || bsp > 1) {
2555:       IS      *is;
2556:       PetscInt b, totb;

2558:       totb  = bsp;
2559:       is    = bsp > 1 ? bzerodiag : &zerodiag;
2560:       nsubs = PetscMax(nsubs, 1);
2561:       PetscCalloc1(nsubs * totb, &zerodiag_subs);
2562:       for (b = 0; b < totb; b++) {
2563:         for (i = 0; i < nsubs; i++) {
2564:           ISLocalToGlobalMapping l2g;
2565:           IS                     t_zerodiag_subs;
2566:           PetscInt               nl;

2568:           if (subs) {
2569:             ISLocalToGlobalMappingCreateIS(subs[i], &l2g);
2570:           } else {
2571:             IS tis;

2573:             MatGetLocalSize(pcbddc->local_mat, &nl, NULL);
2574:             ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis);
2575:             ISLocalToGlobalMappingCreateIS(tis, &l2g);
2576:             ISDestroy(&tis);
2577:           }
2578:           ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs);
2579:           ISGetLocalSize(t_zerodiag_subs, &nl);
2580:           if (nl) {
2581:             PetscBool valid = PETSC_TRUE;

2583:             if (checkb) {
2584:               VecSet(matis->x, 0);
2585:               ISGetLocalSize(subs[i], &nl);
2586:               ISGetIndices(subs[i], &idxs);
2587:               VecGetArray(matis->x, &array);
2588:               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2589:               VecRestoreArray(matis->x, &array);
2590:               ISRestoreIndices(subs[i], &idxs);
2591:               VecPointwiseMult(matis->x, work[0], matis->x);
2592:               MatMult(matis->A, matis->x, matis->y);
2593:               VecPointwiseMult(matis->y, work[1], matis->y);
2594:               VecGetArray(matis->y, &array);
2595:               for (j = 0; j < n_interior_dofs; j++) {
2596:                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2597:                   valid = PETSC_FALSE;
2598:                   break;
2599:                 }
2600:               }
2601:               VecRestoreArray(matis->y, &array);
2602:             }
2603:             if (valid && nneu) {
2604:               const PetscInt *idxs;
2605:               PetscInt        nzb;

2607:               ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs);
2608:               ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL);
2609:               ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs);
2610:               if (nzb) valid = PETSC_FALSE;
2611:             }
2612:             if (valid && pressures) {
2613:               IS       t_pressure_subs, tmp;
2614:               PetscInt i1, i2;

2616:               ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs);
2617:               ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp);
2618:               ISGetLocalSize(tmp, &i1);
2619:               ISGetLocalSize(t_zerodiag_subs, &i2);
2620:               if (i2 != i1) valid = PETSC_FALSE;
2621:               ISDestroy(&t_pressure_subs);
2622:               ISDestroy(&tmp);
2623:             }
2624:             if (valid) {
2625:               ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]);
2626:               benign_n++;
2627:             } else recompute_zerodiag = PETSC_TRUE;
2628:           }
2629:           ISDestroy(&t_zerodiag_subs);
2630:           ISLocalToGlobalMappingDestroy(&l2g);
2631:         }
2632:       }
2633:     } else { /* there's just one subdomain (or zero if they have not been detected */
2634:       PetscBool valid = PETSC_TRUE;

2636:       if (nneu) valid = PETSC_FALSE;
2637:       if (valid && pressures) ISEqual(pressures, zerodiag, &valid);
2638:       if (valid && checkb) {
2639:         MatMult(matis->A, work[0], matis->x);
2640:         VecPointwiseMult(matis->x, work[1], matis->x);
2641:         VecGetArray(matis->x, &array);
2642:         for (j = 0; j < n_interior_dofs; j++) {
2643:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2644:             valid = PETSC_FALSE;
2645:             break;
2646:           }
2647:         }
2648:         VecRestoreArray(matis->x, &array);
2649:       }
2650:       if (valid) {
2651:         benign_n = 1;
2652:         PetscMalloc1(benign_n, &zerodiag_subs);
2653:         PetscObjectReference((PetscObject)zerodiag);
2654:         zerodiag_subs[0] = zerodiag;
2655:       }
2656:     }
2657:     if (checkb) VecDestroyVecs(2, &work);
2658:   }
2659:   PetscFree(interior_dofs);

2661:   if (!benign_n) {
2662:     PetscInt n;

2664:     ISDestroy(&zerodiag);
2665:     recompute_zerodiag = PETSC_FALSE;
2666:     MatGetLocalSize(pcbddc->local_mat, &n, NULL);
2667:     if (n) have_null = PETSC_FALSE;
2668:   }

2670:   /* final check for null pressures */
2671:   if (zerodiag && pressures) ISEqual(pressures, zerodiag, &have_null);

2673:   if (recompute_zerodiag) {
2674:     ISDestroy(&zerodiag);
2675:     if (benign_n == 1) {
2676:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2677:       zerodiag = zerodiag_subs[0];
2678:     } else {
2679:       PetscInt i, nzn, *new_idxs;

2681:       nzn = 0;
2682:       for (i = 0; i < benign_n; i++) {
2683:         PetscInt ns;
2684:         ISGetLocalSize(zerodiag_subs[i], &ns);
2685:         nzn += ns;
2686:       }
2687:       PetscMalloc1(nzn, &new_idxs);
2688:       nzn = 0;
2689:       for (i = 0; i < benign_n; i++) {
2690:         PetscInt ns, *idxs;
2691:         ISGetLocalSize(zerodiag_subs[i], &ns);
2692:         ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs);
2693:         PetscArraycpy(new_idxs + nzn, idxs, ns);
2694:         ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs);
2695:         nzn += ns;
2696:       }
2697:       PetscSortInt(nzn, new_idxs);
2698:       ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag);
2699:     }
2700:     have_null = PETSC_FALSE;
2701:   }

2703:   /* determines if the coarse solver will be singular or not */
2704:   MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc));

2706:   /* Prepare matrix to compute no-net-flux */
2707:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2708:     Mat                    A, loc_divudotp;
2709:     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2710:     IS                     row, col, isused = NULL;
2711:     PetscInt               M, N, n, st, n_isused;

2713:     if (pressures) {
2714:       isused = pressures;
2715:     } else {
2716:       isused = zerodiag_save;
2717:     }
2718:     MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL);
2719:     MatISGetLocalMat(pc->pmat, &A);
2720:     MatGetLocalSize(A, &n, NULL);
2722:     n_isused = 0;
2723:     if (isused) ISGetLocalSize(isused, &n_isused);
2724:     MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
2725:     st = st - n_isused;
2726:     if (n) {
2727:       const PetscInt *gidxs;

2729:       MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp);
2730:       ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs);
2731:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2732:       ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row);
2733:       ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col);
2734:       ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs);
2735:     } else {
2736:       MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp);
2737:       ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row);
2738:       ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col);
2739:     }
2740:     MatGetSize(pc->pmat, NULL, &N);
2741:     ISGetSize(row, &M);
2742:     ISLocalToGlobalMappingCreateIS(row, &rl2g);
2743:     ISLocalToGlobalMappingCreateIS(col, &cl2g);
2744:     ISDestroy(&row);
2745:     ISDestroy(&col);
2746:     MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp);
2747:     MatSetType(pcbddc->divudotp, MATIS);
2748:     MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N);
2749:     MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g);
2750:     ISLocalToGlobalMappingDestroy(&rl2g);
2751:     ISLocalToGlobalMappingDestroy(&cl2g);
2752:     MatISSetLocalMat(pcbddc->divudotp, loc_divudotp);
2753:     MatDestroy(&loc_divudotp);
2754:     MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY);
2755:     MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY);
2756:   }
2757:   ISDestroy(&zerodiag_save);
2758:   ISDestroy(&pressures);
2759:   if (bzerodiag) {
2760:     PetscInt i;

2762:     for (i = 0; i < bsp; i++) ISDestroy(&bzerodiag[i]);
2763:     PetscFree(bzerodiag);
2764:   }
2765:   pcbddc->benign_n             = benign_n;
2766:   pcbddc->benign_zerodiag_subs = zerodiag_subs;

2768:   /* determines if the problem has subdomains with 0 pressure block */
2769:   have_null = (PetscBool)(!!pcbddc->benign_n);
2770:   MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));

2772: project_b0:
2773:   MatGetLocalSize(pcbddc->local_mat, &n, NULL);
2774:   /* change of basis and p0 dofs */
2775:   if (pcbddc->benign_n) {
2776:     PetscInt i, s, *nnz;

2778:     /* local change of basis for pressures */
2779:     MatDestroy(&pcbddc->benign_change);
2780:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change);
2781:     MatSetType(pcbddc->benign_change, MATAIJ);
2782:     MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE);
2783:     PetscMalloc1(n, &nnz);
2784:     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
2785:     for (i = 0; i < pcbddc->benign_n; i++) {
2786:       const PetscInt *idxs;
2787:       PetscInt        nzs, j;

2789:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs);
2790:       ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs);
2791:       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
2792:       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
2793:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs);
2794:     }
2795:     MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz);
2796:     MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
2797:     PetscFree(nnz);
2798:     /* set identity by default */
2799:     for (i = 0; i < n; i++) MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES);
2800:     PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0);
2801:     PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0);
2802:     /* set change on pressures */
2803:     for (s = 0; s < pcbddc->benign_n; s++) {
2804:       PetscScalar    *array;
2805:       const PetscInt *idxs;
2806:       PetscInt        nzs;

2808:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs);
2809:       ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs);
2810:       for (i = 0; i < nzs - 1; i++) {
2811:         PetscScalar vals[2];
2812:         PetscInt    cols[2];

2814:         cols[0] = idxs[i];
2815:         cols[1] = idxs[nzs - 1];
2816:         vals[0] = 1.;
2817:         vals[1] = 1.;
2818:         MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES);
2819:       }
2820:       PetscMalloc1(nzs, &array);
2821:       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
2822:       array[nzs - 1] = 1.;
2823:       MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES);
2824:       /* store local idxs for p0 */
2825:       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
2826:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs);
2827:       PetscFree(array);
2828:     }
2829:     MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY);
2830:     MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY);

2832:     /* project if needed */
2833:     if (pcbddc->benign_change_explicit) {
2834:       Mat M;

2836:       MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M);
2837:       MatDestroy(&pcbddc->local_mat);
2838:       MatSeqAIJCompress(M, &pcbddc->local_mat);
2839:       MatDestroy(&M);
2840:     }
2841:     /* store global idxs for p0 */
2842:     ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx);
2843:   }
2844:   *zerodiaglocal = zerodiag;
2845:   return 0;
2846: }

2848: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2849: {
2850:   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
2851:   PetscScalar *array;

2853:   if (!pcbddc->benign_sf) {
2854:     PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf);
2855:     PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx);
2856:   }
2857:   if (get) {
2858:     VecGetArrayRead(v, (const PetscScalar **)&array);
2859:     PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE);
2860:     PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE);
2861:     VecRestoreArrayRead(v, (const PetscScalar **)&array);
2862:   } else {
2863:     VecGetArray(v, &array);
2864:     PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE);
2865:     PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE);
2866:     VecRestoreArray(v, &array);
2867:   }
2868:   return 0;
2869: }

2871: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2872: {
2873:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

2875:   /* TODO: add error checking
2876:     - avoid nested pop (or push) calls.
2877:     - cannot push before pop.
2878:     - cannot call this if pcbddc->local_mat is NULL
2879:   */
2880:   if (!pcbddc->benign_n) return 0;
2881:   if (pop) {
2882:     if (pcbddc->benign_change_explicit) {
2883:       IS       is_p0;
2884:       MatReuse reuse;

2886:       /* extract B_0 */
2887:       reuse = MAT_INITIAL_MATRIX;
2888:       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
2889:       ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0);
2890:       MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0);
2891:       /* remove rows and cols from local problem */
2892:       MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);
2893:       MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
2894:       MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL);
2895:       ISDestroy(&is_p0);
2896:     } else {
2897:       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
2898:       PetscScalar *vals;
2899:       PetscInt     i, n, *idxs_ins;

2901:       VecGetLocalSize(matis->y, &n);
2902:       PetscMalloc2(n, &idxs_ins, n, &vals);
2903:       if (!pcbddc->benign_B0) {
2904:         PetscInt *nnz;
2905:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0);
2906:         MatSetType(pcbddc->benign_B0, MATAIJ);
2907:         MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE);
2908:         PetscMalloc1(pcbddc->benign_n, &nnz);
2909:         for (i = 0; i < pcbddc->benign_n; i++) {
2910:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]);
2911:           nnz[i] = n - nnz[i];
2912:         }
2913:         MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz);
2914:         MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
2915:         PetscFree(nnz);
2916:       }

2918:       for (i = 0; i < pcbddc->benign_n; i++) {
2919:         PetscScalar *array;
2920:         PetscInt    *idxs, j, nz, cum;

2922:         VecSet(matis->x, 0.);
2923:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz);
2924:         ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs);
2925:         for (j = 0; j < nz; j++) vals[j] = 1.;
2926:         VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES);
2927:         VecAssemblyBegin(matis->x);
2928:         VecAssemblyEnd(matis->x);
2929:         VecSet(matis->y, 0.);
2930:         MatMult(matis->A, matis->x, matis->y);
2931:         VecGetArray(matis->y, &array);
2932:         cum = 0;
2933:         for (j = 0; j < n; j++) {
2934:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2935:             vals[cum]     = array[j];
2936:             idxs_ins[cum] = j;
2937:             cum++;
2938:           }
2939:         }
2940:         MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES);
2941:         VecRestoreArray(matis->y, &array);
2942:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs);
2943:       }
2944:       MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY);
2945:       MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY);
2946:       PetscFree2(idxs_ins, vals);
2947:     }
2948:   } else { /* push */
2949:     if (pcbddc->benign_change_explicit) {
2950:       PetscInt i;

2952:       for (i = 0; i < pcbddc->benign_n; i++) {
2953:         PetscScalar *B0_vals;
2954:         PetscInt    *B0_cols, B0_ncol;

2956:         MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals);
2957:         MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES);
2958:         MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES);
2959:         MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES);
2960:         MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals);
2961:       }
2962:       MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY);
2963:       MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY);
2964:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
2965:   }
2966:   return 0;
2967: }

2969: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
2970: {
2971:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
2972:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2973:   PetscBLASInt    B_dummyint, B_neigs, B_ierr, B_lwork;
2974:   PetscBLASInt   *B_iwork, *B_ifail;
2975:   PetscScalar    *work, lwork;
2976:   PetscScalar    *St, *S, *eigv;
2977:   PetscScalar    *Sarray, *Starray;
2978:   PetscReal      *eigs, thresh, lthresh, uthresh;
2979:   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
2980:   PetscBool       allocated_S_St, upart;
2981: #if defined(PETSC_USE_COMPLEX)
2982:   PetscReal *rwork;
2983: #endif

2985:   if (!pcbddc->adaptive_selection) return 0;
2989:              sub_schurs->is_posdef);
2990:   PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0);

2992:   if (pcbddc->dbg_flag) {
2993:     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
2994:     PetscViewerFlush(pcbddc->dbg_viewer);
2995:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
2996:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n");
2997:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
2998:   }

3000:   if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef);

3002:   /* max size of subsets */
3003:   mss = 0;
3004:   for (i = 0; i < sub_schurs->n_subs; i++) {
3005:     PetscInt subset_size;

3007:     ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
3008:     mss = PetscMax(mss, subset_size);
3009:   }

3011:   /* min/max and threshold */
3012:   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3013:   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3014:   nmax           = PetscMax(nmin, nmax);
3015:   allocated_S_St = PETSC_FALSE;
3016:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3017:     allocated_S_St = PETSC_TRUE;
3018:   }

3020:   /* allocate lapack workspace */
3021:   cum = cum2 = 0;
3022:   maxneigs   = 0;
3023:   for (i = 0; i < sub_schurs->n_subs; i++) {
3024:     PetscInt n, subset_size;

3026:     ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
3027:     n = PetscMin(subset_size, nmax);
3028:     cum += subset_size;
3029:     cum2 += subset_size * n;
3030:     maxneigs = PetscMax(maxneigs, n);
3031:   }
3032:   lwork = 0;
3033:   if (mss) {
3034:     if (sub_schurs->is_symmetric) {
3035:       PetscScalar  sdummy  = 0.;
3036:       PetscBLASInt B_itype = 1;
3037:       PetscBLASInt B_N = mss, idummy = 0;
3038:       PetscReal    rdummy = 0., zero = 0.0;
3039:       PetscReal    eps = 0.0; /* dlamch? */

3041:       B_lwork = -1;
3042:       /* some implementations may complain about NULL pointers, even if we are querying */
3043:       S       = &sdummy;
3044:       St      = &sdummy;
3045:       eigs    = &rdummy;
3046:       eigv    = &sdummy;
3047:       B_iwork = &idummy;
3048:       B_ifail = &idummy;
3049: #if defined(PETSC_USE_COMPLEX)
3050:       rwork = &rdummy;
3051: #endif
3052:       thresh = 1.0;
3053:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3054: #if defined(PETSC_USE_COMPLEX)
3055:       PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3056: #else
3057:       PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3058: #endif
3060:       PetscFPTrapPop();
3061:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3062:   }

3064:   nv = 0;
3065:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3066:     ISGetLocalSize(sub_schurs->is_vertices, &nv);
3067:   }
3068:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork);
3069:   if (allocated_S_St) PetscMalloc2(mss * mss, &S, mss * mss, &St);
3070:   PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail);
3071: #if defined(PETSC_USE_COMPLEX)
3072:   PetscMalloc1(7 * mss, &rwork);
3073: #endif
3074:   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3075:                          &pcbddc->adaptive_constraints_data));
3076:   PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs);

3078:   maxneigs = 0;
3079:   cum = cumarray                           = 0;
3080:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3081:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3082:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3083:     const PetscInt *idxs;

3085:     ISGetIndices(sub_schurs->is_vertices, &idxs);
3086:     for (cum = 0; cum < nv; cum++) {
3087:       pcbddc->adaptive_constraints_n[cum]            = 1;
3088:       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3089:       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3090:       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3091:       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3092:     }
3093:     ISRestoreIndices(sub_schurs->is_vertices, &idxs);
3094:   }

3096:   if (mss) { /* multilevel */
3097:     if (sub_schurs->gdsw) {
3098:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray);
3099:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray);
3100:     } else {
3101:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray);
3102:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray);
3103:     }
3104:   }

3106:   lthresh = pcbddc->adaptive_threshold[0];
3107:   uthresh = pcbddc->adaptive_threshold[1];
3108:   upart   = pcbddc->use_deluxe_scaling;
3109:   for (i = 0; i < sub_schurs->n_subs; i++) {
3110:     const PetscInt *idxs;
3111:     PetscReal       upper, lower;
3112:     PetscInt        j, subset_size, eigs_start = 0;
3113:     PetscBLASInt    B_N;
3114:     PetscBool       same_data = PETSC_FALSE;
3115:     PetscBool       scal      = PETSC_FALSE;

3117:     if (upart) {
3118:       upper = PETSC_MAX_REAL;
3119:       lower = uthresh;
3120:     } else {
3121:       if (sub_schurs->gdsw) {
3122:         upper = uthresh;
3123:         lower = PETSC_MIN_REAL;
3124:       } else {
3126:         upper = 1. / uthresh;
3127:         lower = 0.;
3128:       }
3129:     }
3130:     ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
3131:     ISGetIndices(sub_schurs->is_subs[i], &idxs);
3132:     PetscBLASIntCast(subset_size, &B_N);
3133:     /* this is experimental: we assume the dofs have been properly grouped to have
3134:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3135:     if (!sub_schurs->is_posdef) {
3136:       Mat T;

3138:       for (j = 0; j < subset_size; j++) {
3139:         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3140:           MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T);
3141:           MatScale(T, -1.0);
3142:           MatDestroy(&T);
3143:           MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T);
3144:           MatScale(T, -1.0);
3145:           MatDestroy(&T);
3146:           if (sub_schurs->change_primal_sub) {
3147:             PetscInt        nz, k;
3148:             const PetscInt *idxs;

3150:             ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz);
3151:             ISGetIndices(sub_schurs->change_primal_sub[i], &idxs);
3152:             for (k = 0; k < nz; k++) {
3153:               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3154:               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3155:             }
3156:             ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs);
3157:           }
3158:           scal = PETSC_TRUE;
3159:           break;
3160:         }
3161:       }
3162:     }

3164:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3165:       if (sub_schurs->is_symmetric) {
3166:         PetscInt j, k;
3167:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3168:           PetscArrayzero(S, subset_size * subset_size);
3169:           PetscArrayzero(St, subset_size * subset_size);
3170:         }
3171:         for (j = 0; j < subset_size; j++) {
3172:           for (k = j; k < subset_size; k++) {
3173:             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3174:             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3175:           }
3176:         }
3177:       } else {
3178:         PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3179:         PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3180:       }
3181:     } else {
3182:       S  = Sarray + cumarray;
3183:       St = Starray + cumarray;
3184:     }
3185:     /* see if we can save some work */
3186:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscArraycmp(S, St, subset_size * subset_size, &same_data);

3188:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3189:       B_neigs = 0;
3190:     } else {
3191:       if (sub_schurs->is_symmetric) {
3192:         PetscBLASInt B_itype = 1;
3193:         PetscBLASInt B_IL, B_IU;
3194:         PetscReal    eps = -1.0; /* dlamch? */
3195:         PetscInt     nmin_s;
3196:         PetscBool    compute_range;

3198:         B_neigs       = 0;
3199:         compute_range = (PetscBool)!same_data;
3200:         if (nmin >= subset_size) compute_range = PETSC_FALSE;

3202:         if (pcbddc->dbg_flag) {
3203:           PetscInt nc = 0;

3205:           if (sub_schurs->change_primal_sub) ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc);
3206:           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i,
3207:                                                        sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc));
3208:         }

3210:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3211:         if (compute_range) {
3212:           /* ask for eigenvalues larger than thresh */
3213:           if (sub_schurs->is_posdef) {
3214: #if defined(PETSC_USE_COMPLEX)
3215:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3216: #else
3217:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3218: #endif
3219:             PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3220:           } else { /* no theory so far, but it works nicely */
3221:             PetscInt  recipe = 0, recipe_m = 1;
3222:             PetscReal bb[2];

3224:             PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL);
3225:             switch (recipe) {
3226:             case 0:
3227:               if (scal) {
3228:                 bb[0] = PETSC_MIN_REAL;
3229:                 bb[1] = lthresh;
3230:               } else {
3231:                 bb[0] = uthresh;
3232:                 bb[1] = PETSC_MAX_REAL;
3233:               }
3234: #if defined(PETSC_USE_COMPLEX)
3235:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3236: #else
3237:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3238: #endif
3239:               PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3240:               break;
3241:             case 1:
3242:               bb[0] = PETSC_MIN_REAL;
3243:               bb[1] = lthresh * lthresh;
3244: #if defined(PETSC_USE_COMPLEX)
3245:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3246: #else
3247:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3248: #endif
3249:               PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3250:               if (!scal) {
3251:                 PetscBLASInt B_neigs2 = 0;

3253:                 bb[0] = PetscMax(lthresh * lthresh, uthresh);
3254:                 bb[1] = PETSC_MAX_REAL;
3255:                 PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3256:                 PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3257: #if defined(PETSC_USE_COMPLEX)
3258:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3259: #else
3260:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3261: #endif
3262:                 PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3263:                 B_neigs += B_neigs2;
3264:               }
3265:               break;
3266:             case 2:
3267:               if (scal) {
3268:                 bb[0] = PETSC_MIN_REAL;
3269:                 bb[1] = 0;
3270: #if defined(PETSC_USE_COMPLEX)
3271:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3272: #else
3273:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3274: #endif
3275:                 PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3276:               } else {
3277:                 PetscBLASInt B_neigs2 = 0;
3278:                 PetscBool    import   = PETSC_FALSE;

3280:                 lthresh = PetscMax(lthresh, 0.0);
3281:                 if (lthresh > 0.0) {
3282:                   bb[0] = PETSC_MIN_REAL;
3283:                   bb[1] = lthresh * lthresh;

3285:                   import = PETSC_TRUE;
3286: #if defined(PETSC_USE_COMPLEX)
3287:                   PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3288: #else
3289:                   PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3290: #endif
3291:                   PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3292:                 }
3293:                 bb[0] = PetscMax(lthresh * lthresh, uthresh);
3294:                 bb[1] = PETSC_MAX_REAL;
3295:                 if (import) {
3296:                   PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3297:                   PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3298:                 }
3299: #if defined(PETSC_USE_COMPLEX)
3300:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3301: #else
3302:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3303: #endif
3304:                 PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3305:                 B_neigs += B_neigs2;
3306:               }
3307:               break;
3308:             case 3:
3309:               if (scal) {
3310:                 PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL);
3311:               } else {
3312:                 PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL);
3313:               }
3314:               if (!scal) {
3315:                 bb[0] = uthresh;
3316:                 bb[1] = PETSC_MAX_REAL;
3317: #if defined(PETSC_USE_COMPLEX)
3318:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3319: #else
3320:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3321: #endif
3322:                 PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3323:               }
3324:               if (recipe_m > 0 && B_N - B_neigs > 0) {
3325:                 PetscBLASInt B_neigs2 = 0;

3327:                 B_IL = 1;
3328:                 PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU);
3329:                 PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3330:                 PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3331: #if defined(PETSC_USE_COMPLEX)
3332:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3333: #else
3334:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3335: #endif
3336:                 PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3337:                 B_neigs += B_neigs2;
3338:               }
3339:               break;
3340:             case 4:
3341:               bb[0] = PETSC_MIN_REAL;
3342:               bb[1] = lthresh;
3343: #if defined(PETSC_USE_COMPLEX)
3344:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3345: #else
3346:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3347: #endif
3348:               PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3349:               {
3350:                 PetscBLASInt B_neigs2 = 0;

3352:                 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3353:                 bb[1] = PETSC_MAX_REAL;
3354:                 PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3355:                 PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3356: #if defined(PETSC_USE_COMPLEX)
3357:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3358: #else
3359:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3360: #endif
3361:                 PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3362:                 B_neigs += B_neigs2;
3363:               }
3364:               break;
3365:             case 5: /* same as before: first compute all eigenvalues, then filter */
3366: #if defined(PETSC_USE_COMPLEX)
3367:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3368: #else
3369:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3370: #endif
3371:               PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3372:               {
3373:                 PetscInt e, k, ne;
3374:                 for (e = 0, ne = 0; e < B_neigs; e++) {
3375:                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3376:                     for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3377:                     eigs[ne] = eigs[e];
3378:                     ne++;
3379:                   }
3380:                 }
3381:                 PetscArraycpy(eigv, S, B_N * ne);
3382:                 B_neigs = ne;
3383:               }
3384:               break;
3385:             default:
3386:               SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3387:             }
3388:           }
3389:         } else if (!same_data) { /* this is just to see all the eigenvalues */
3390:           B_IU = PetscMax(1, PetscMin(B_N, nmax));
3391:           B_IL = 1;
3392: #if defined(PETSC_USE_COMPLEX)
3393:           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3394: #else
3395:           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3396: #endif
3397:           PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3398:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3399:           PetscInt k;
3401:           ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax);
3402:           PetscBLASIntCast(nmax, &B_neigs);
3403:           nmin = nmax;
3404:           PetscArrayzero(eigv, subset_size * nmax);
3405:           for (k = 0; k < nmax; k++) {
3406:             eigs[k]                     = 1. / PETSC_SMALL;
3407:             eigv[k * (subset_size + 1)] = 1.0;
3408:           }
3409:         }
3410:         PetscFPTrapPop();
3411:         if (B_ierr) {
3414:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3415:         }

3417:         if (B_neigs > nmax) {
3418:           if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax);
3419:           if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3420:           B_neigs = nmax;
3421:         }

3423:         nmin_s = PetscMin(nmin, B_N);
3424:         if (B_neigs < nmin_s) {
3425:           PetscBLASInt B_neigs2 = 0;

3427:           if (upart) {
3428:             if (scal) {
3429:               B_IU = nmin_s;
3430:               B_IL = B_neigs + 1;
3431:             } else {
3432:               B_IL = B_N - nmin_s + 1;
3433:               B_IU = B_N - B_neigs;
3434:             }
3435:           } else {
3436:             B_IL = B_neigs + 1;
3437:             B_IU = nmin_s;
3438:           }
3439:           if (pcbddc->dbg_flag) {
3440:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU);
3441:           }
3442:           if (sub_schurs->is_symmetric) {
3443:             PetscInt j, k;
3444:             for (j = 0; j < subset_size; j++) {
3445:               for (k = j; k < subset_size; k++) {
3446:                 S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3447:                 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3448:               }
3449:             }
3450:           } else {
3451:             PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size);
3452:             PetscArraycpy(St, Starray + cumarray, subset_size * subset_size);
3453:           }
3454:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3455: #if defined(PETSC_USE_COMPLEX)
3456:           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3457: #else
3458:           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3459: #endif
3460:           PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0);
3461:           PetscFPTrapPop();
3462:           B_neigs += B_neigs2;
3463:         }
3464:         if (B_ierr) {
3467:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3468:         }
3469:         if (pcbddc->dbg_flag) {
3470:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs);
3471:           for (j = 0; j < B_neigs; j++) {
3472:             if (!sub_schurs->gdsw) {
3473:               if (eigs[j] == 0.0) {
3474:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n");
3475:               } else {
3476:                 if (upart) {
3477:                   PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]);
3478:                 } else {
3479:                   PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1. / eigs[j + eigs_start]));
3480:                 }
3481:               }
3482:             } else {
3483:               double pg = (double)eigs[j + eigs_start];
3484:               if (pg < 2 * PETSC_SMALL) pg = 0.0;
3485:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg);
3486:             }
3487:           }
3488:         }
3489:       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3490:     }
3491:     /* change the basis back to the original one */
3492:     if (sub_schurs->change) {
3493:       Mat change, phi, phit;

3495:       if (pcbddc->dbg_flag > 2) {
3496:         PetscInt ii;
3497:         for (ii = 0; ii < B_neigs; ii++) {
3498:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N);
3499:           for (j = 0; j < B_N; j++) {
3500: #if defined(PETSC_USE_COMPLEX)
3501:             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3502:             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3503:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c);
3504: #else
3505:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]));
3506: #endif
3507:           }
3508:         }
3509:       }
3510:       KSPGetOperators(sub_schurs->change[i], &change, NULL);
3511:       MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit);
3512:       MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi);
3513:       MatCopy(phi, phit, SAME_NONZERO_PATTERN);
3514:       MatDestroy(&phit);
3515:       MatDestroy(&phi);
3516:     }
3517:     maxneigs                               = PetscMax(B_neigs, maxneigs);
3518:     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3519:     if (B_neigs) {
3520:       PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size);

3522:       if (pcbddc->dbg_flag > 1) {
3523:         PetscInt ii;
3524:         for (ii = 0; ii < B_neigs; ii++) {
3525:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N);
3526:           for (j = 0; j < B_N; j++) {
3527: #if defined(PETSC_USE_COMPLEX)
3528:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3529:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3530:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c);
3531: #else
3532:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]));
3533: #endif
3534:           }
3535:         }
3536:       }
3537:       PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size);
3538:       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3539:       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3540:       cum++;
3541:     }
3542:     ISRestoreIndices(sub_schurs->is_subs[i], &idxs);
3543:     /* shift for next computation */
3544:     cumarray += subset_size * subset_size;
3545:   }
3546:   if (pcbddc->dbg_flag) PetscViewerFlush(pcbddc->dbg_viewer);

3548:   if (mss) {
3549:     if (sub_schurs->gdsw) {
3550:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray);
3551:       MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray);
3552:     } else {
3553:       MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray);
3554:       MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray);
3555:       /* destroy matrices (junk) */
3556:       MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3557:       MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3558:     }
3559:   }
3560:   if (allocated_S_St) PetscFree2(S, St);
3561:   PetscFree5(eigv, eigs, work, B_iwork, B_ifail);
3562: #if defined(PETSC_USE_COMPLEX)
3563:   PetscFree(rwork);
3564: #endif
3565:   if (pcbddc->dbg_flag) {
3566:     PetscInt maxneigs_r;
3567:     MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc));
3568:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r);
3569:   }
3570:   PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0);
3571:   return 0;
3572: }

3574: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3575: {
3576:   PetscScalar *coarse_submat_vals;

3578:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3579:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3580:   PCBDDCSetUpLocalScatters(pc);

3582:   /* Setup local neumann solver ksp_R */
3583:   /* PCBDDCSetUpLocalScatters should be called first! */
3584:   PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE);

3586:   /*
3587:      Setup local correction and local part of coarse basis.
3588:      Gives back the dense local part of the coarse matrix in column major ordering
3589:   */
3590:   PCBDDCSetUpCorrection(pc, &coarse_submat_vals);

3592:   /* Compute total number of coarse nodes and setup coarse solver */
3593:   PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals);

3595:   /* free */
3596:   PetscFree(coarse_submat_vals);
3597:   return 0;
3598: }

3600: PetscErrorCode PCBDDCResetCustomization(PC pc)
3601: {
3602:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

3604:   ISDestroy(&pcbddc->user_primal_vertices);
3605:   ISDestroy(&pcbddc->user_primal_vertices_local);
3606:   ISDestroy(&pcbddc->NeumannBoundaries);
3607:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3608:   ISDestroy(&pcbddc->DirichletBoundaries);
3609:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3610:   PetscFree(pcbddc->onearnullvecs_state);
3611:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3612:   PCBDDCSetDofsSplitting(pc, 0, NULL);
3613:   PCBDDCSetDofsSplittingLocal(pc, 0, NULL);
3614:   return 0;
3615: }

3617: PetscErrorCode PCBDDCResetTopography(PC pc)
3618: {
3619:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3620:   PetscInt i;

3622:   MatDestroy(&pcbddc->nedcG);
3623:   ISDestroy(&pcbddc->nedclocal);
3624:   MatDestroy(&pcbddc->discretegradient);
3625:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3626:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3627:   MatDestroy(&pcbddc->switch_static_change);
3628:   VecDestroy(&pcbddc->work_change);
3629:   MatDestroy(&pcbddc->ConstraintMatrix);
3630:   MatDestroy(&pcbddc->divudotp);
3631:   ISDestroy(&pcbddc->divudotp_vl2l);
3632:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3633:   for (i = 0; i < pcbddc->n_local_subs; i++) ISDestroy(&pcbddc->local_subs[i]);
3634:   pcbddc->n_local_subs = 0;
3635:   PetscFree(pcbddc->local_subs);
3636:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3637:   pcbddc->graphanalyzed        = PETSC_FALSE;
3638:   pcbddc->recompute_topography = PETSC_TRUE;
3639:   pcbddc->corner_selected      = PETSC_FALSE;
3640:   return 0;
3641: }

3643: PetscErrorCode PCBDDCResetSolvers(PC pc)
3644: {
3645:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

3647:   VecDestroy(&pcbddc->coarse_vec);
3648:   if (pcbddc->coarse_phi_B) {
3649:     PetscScalar *array;
3650:     MatDenseGetArray(pcbddc->coarse_phi_B, &array);
3651:     PetscFree(array);
3652:   }
3653:   MatDestroy(&pcbddc->coarse_phi_B);
3654:   MatDestroy(&pcbddc->coarse_phi_D);
3655:   MatDestroy(&pcbddc->coarse_psi_B);
3656:   MatDestroy(&pcbddc->coarse_psi_D);
3657:   VecDestroy(&pcbddc->vec1_P);
3658:   VecDestroy(&pcbddc->vec1_C);
3659:   MatDestroy(&pcbddc->local_auxmat2);
3660:   MatDestroy(&pcbddc->local_auxmat1);
3661:   VecDestroy(&pcbddc->vec1_R);
3662:   VecDestroy(&pcbddc->vec2_R);
3663:   ISDestroy(&pcbddc->is_R_local);
3664:   VecScatterDestroy(&pcbddc->R_to_B);
3665:   VecScatterDestroy(&pcbddc->R_to_D);
3666:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3667:   KSPReset(pcbddc->ksp_D);
3668:   KSPReset(pcbddc->ksp_R);
3669:   KSPReset(pcbddc->coarse_ksp);
3670:   MatDestroy(&pcbddc->local_mat);
3671:   PetscFree(pcbddc->primal_indices_local_idxs);
3672:   PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult);
3673:   PetscFree(pcbddc->global_primal_indices);
3674:   ISDestroy(&pcbddc->coarse_subassembling);
3675:   MatDestroy(&pcbddc->benign_change);
3676:   VecDestroy(&pcbddc->benign_vec);
3677:   PCBDDCBenignShellMat(pc, PETSC_TRUE);
3678:   MatDestroy(&pcbddc->benign_B0);
3679:   PetscSFDestroy(&pcbddc->benign_sf);
3680:   if (pcbddc->benign_zerodiag_subs) {
3681:     PetscInt i;
3682:     for (i = 0; i < pcbddc->benign_n; i++) ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3683:     PetscFree(pcbddc->benign_zerodiag_subs);
3684:   }
3685:   PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0);
3686:   return 0;
3687: }

3689: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3690: {
3691:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3692:   PC_IS   *pcis   = (PC_IS *)pc->data;
3693:   VecType  impVecType;
3694:   PetscInt n_constraints, n_R, old_size;

3696:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3697:   n_R           = pcis->n - pcbddc->n_vertices;
3698:   VecGetType(pcis->vec1_N, &impVecType);
3699:   /* local work vectors (try to avoid unneeded work)*/
3700:   /* R nodes */
3701:   old_size = -1;
3702:   if (pcbddc->vec1_R) VecGetSize(pcbddc->vec1_R, &old_size);
3703:   if (n_R != old_size) {
3704:     VecDestroy(&pcbddc->vec1_R);
3705:     VecDestroy(&pcbddc->vec2_R);
3706:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R);
3707:     VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R);
3708:     VecSetType(pcbddc->vec1_R, impVecType);
3709:     VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R);
3710:   }
3711:   /* local primal dofs */
3712:   old_size = -1;
3713:   if (pcbddc->vec1_P) VecGetSize(pcbddc->vec1_P, &old_size);
3714:   if (pcbddc->local_primal_size != old_size) {
3715:     VecDestroy(&pcbddc->vec1_P);
3716:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P);
3717:     VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size);
3718:     VecSetType(pcbddc->vec1_P, impVecType);
3719:   }
3720:   /* local explicit constraints */
3721:   old_size = -1;
3722:   if (pcbddc->vec1_C) VecGetSize(pcbddc->vec1_C, &old_size);
3723:   if (n_constraints && n_constraints != old_size) {
3724:     VecDestroy(&pcbddc->vec1_C);
3725:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C);
3726:     VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints);
3727:     VecSetType(pcbddc->vec1_C, impVecType);
3728:   }
3729:   return 0;
3730: }

3732: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3733: {
3734:   /* pointers to pcis and pcbddc */
3735:   PC_IS          *pcis       = (PC_IS *)pc->data;
3736:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3737:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3738:   /* submatrices of local problem */
3739:   Mat A_RV, A_VR, A_VV, local_auxmat2_R;
3740:   /* submatrices of local coarse problem */
3741:   Mat S_VV, S_CV, S_VC, S_CC;
3742:   /* working matrices */
3743:   Mat C_CR;
3744:   /* additional working stuff */
3745:   PC           pc_R;
3746:   Mat          F, Brhs = NULL;
3747:   Vec          dummy_vec;
3748:   PetscBool    isLU, isCHOL, need_benign_correction, sparserhs;
3749:   PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3750:   PetscScalar *work;
3751:   PetscInt    *idx_V_B;
3752:   PetscInt     lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I;
3753:   PetscInt     i, n_R, n_D, n_B;
3754:   PetscScalar  one = 1.0, m_one = -1.0;

3757:   PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0);

3759:   /* Set Non-overlapping dimensions */
3760:   n_vertices    = pcbddc->n_vertices;
3761:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3762:   n_B           = pcis->n_B;
3763:   n_D           = pcis->n - n_B;
3764:   n_R           = pcis->n - n_vertices;

3766:   /* vertices in boundary numbering */
3767:   PetscMalloc1(n_vertices, &idx_V_B);
3768:   ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B);

3771:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3772:   PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals);
3773:   MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV);
3774:   MatDenseSetLDA(S_VV, pcbddc->local_primal_size);
3775:   MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV);
3776:   MatDenseSetLDA(S_CV, pcbddc->local_primal_size);
3777:   MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC);
3778:   MatDenseSetLDA(S_VC, pcbddc->local_primal_size);
3779:   MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC);
3780:   MatDenseSetLDA(S_CC, pcbddc->local_primal_size);

3782:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3783:   KSPGetPC(pcbddc->ksp_R, &pc_R);
3784:   PCSetUp(pc_R);
3785:   PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU);
3786:   PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL);
3787:   lda_rhs                = n_R;
3788:   need_benign_correction = PETSC_FALSE;
3789:   if (isLU || isCHOL) {
3790:     PCFactorGetMatrix(pc_R, &F);
3791:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3792:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3793:     MatFactorType      type;

3795:     F = reuse_solver->F;
3796:     MatGetFactorType(F, &type);
3797:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3798:     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3799:     MatGetSize(F, &lda_rhs, NULL);
3800:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3801:   } else F = NULL;

3803:   /* determine if we can use a sparse right-hand side */
3804:   sparserhs = PETSC_FALSE;
3805:   if (F) {
3806:     MatSolverType solver;

3808:     MatFactorGetSolverType(F, &solver);
3809:     PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs);
3810:   }

3812:   /* allocate workspace */
3813:   n = 0;
3814:   if (n_constraints) n += lda_rhs * n_constraints;
3815:   if (n_vertices) {
3816:     n = PetscMax(2 * lda_rhs * n_vertices, n);
3817:     n = PetscMax((lda_rhs + n_B) * n_vertices, n);
3818:   }
3819:   if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n);
3820:   PetscMalloc1(n, &work);

3822:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3823:   dummy_vec = NULL;
3824:   if (need_benign_correction && lda_rhs != n_R && F) {
3825:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec);
3826:     VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE);
3827:     VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name);
3828:   }

3830:   MatDestroy(&pcbddc->local_auxmat1);
3831:   MatDestroy(&pcbddc->local_auxmat2);

3833:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3834:   if (n_constraints) {
3835:     Mat M3, C_B;
3836:     IS  is_aux;

3838:     /* Extract constraints on R nodes: C_{CR}  */
3839:     ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux);
3840:     MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR);
3841:     MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B);

3843:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3844:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3845:     if (!sparserhs) {
3846:       PetscArrayzero(work, lda_rhs * n_constraints);
3847:       for (i = 0; i < n_constraints; i++) {
3848:         const PetscScalar *row_cmat_values;
3849:         const PetscInt    *row_cmat_indices;
3850:         PetscInt           size_of_constraint, j;

3852:         MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values);
3853:         for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j];
3854:         MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values);
3855:       }
3856:       MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs);
3857:     } else {
3858:       Mat tC_CR;

3860:       MatScale(C_CR, -1.0);
3861:       if (lda_rhs != n_R) {
3862:         PetscScalar *aa;
3863:         PetscInt     r, *ii, *jj;
3864:         PetscBool    done;

3866:         MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done);
3868:         MatSeqAIJGetArray(C_CR, &aa);
3869:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR);
3870:         MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done);
3872:       } else {
3873:         PetscObjectReference((PetscObject)C_CR);
3874:         tC_CR = C_CR;
3875:       }
3876:       MatCreateTranspose(tC_CR, &Brhs);
3877:       MatDestroy(&tC_CR);
3878:     }
3879:     MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R);
3880:     if (F) {
3881:       if (need_benign_correction) {
3882:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3884:         /* rhs is already zero on interior dofs, no need to change the rhs */
3885:         PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n);
3886:       }
3887:       MatMatSolve(F, Brhs, local_auxmat2_R);
3888:       if (need_benign_correction) {
3889:         PetscScalar       *marr;
3890:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3892:         MatDenseGetArray(local_auxmat2_R, &marr);
3893:         if (lda_rhs != n_R) {
3894:           for (i = 0; i < n_constraints; i++) {
3895:             VecPlaceArray(dummy_vec, marr + i * lda_rhs);
3896:             PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE);
3897:             VecResetArray(dummy_vec);
3898:           }
3899:         } else {
3900:           for (i = 0; i < n_constraints; i++) {
3901:             VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs);
3902:             PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE);
3903:             VecResetArray(pcbddc->vec1_R);
3904:           }
3905:         }
3906:         MatDenseRestoreArray(local_auxmat2_R, &marr);
3907:       }
3908:     } else {
3909:       PetscScalar *marr;

3911:       MatDenseGetArray(local_auxmat2_R, &marr);
3912:       for (i = 0; i < n_constraints; i++) {
3913:         VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs);
3914:         VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs);
3915:         KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R);
3916:         KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
3917:         VecResetArray(pcbddc->vec1_R);
3918:         VecResetArray(pcbddc->vec2_R);
3919:       }
3920:       MatDenseRestoreArray(local_auxmat2_R, &marr);
3921:     }
3922:     if (sparserhs) MatScale(C_CR, -1.0);
3923:     MatDestroy(&Brhs);
3924:     if (!pcbddc->switch_static) {
3925:       MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2);
3926:       for (i = 0; i < n_constraints; i++) {
3927:         Vec r, b;
3928:         MatDenseGetColumnVecRead(local_auxmat2_R, i, &r);
3929:         MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b);
3930:         VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD);
3931:         VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD);
3932:         MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b);
3933:         MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r);
3934:       }
3935:       MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3);
3936:     } else {
3937:       if (lda_rhs != n_R) {
3938:         IS dummy;

3940:         ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy);
3941:         MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2);
3942:         ISDestroy(&dummy);
3943:       } else {
3944:         PetscObjectReference((PetscObject)local_auxmat2_R);
3945:         pcbddc->local_auxmat2 = local_auxmat2_R;
3946:       }
3947:       MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3);
3948:     }
3949:     ISDestroy(&is_aux);
3950:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
3951:     MatScale(M3, m_one);
3952:     if (isCHOL) {
3953:       MatCholeskyFactor(M3, NULL, NULL);
3954:     } else {
3955:       MatLUFactor(M3, NULL, NULL, NULL);
3956:     }
3957:     MatSeqDenseInvertFactors_Private(M3);
3958:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
3959:     MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1);
3960:     MatDestroy(&C_B);
3961:     MatCopy(M3, S_CC, SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
3962:     MatDestroy(&M3);
3963:   }

3965:   /* Get submatrices from subdomain matrix */
3966:   if (n_vertices) {
3967: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3968:     PetscBool oldpin;
3969: #endif
3970:     PetscBool isaij;
3971:     IS        is_aux;

3973:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
3974:       IS tis;

3976:       ISDuplicate(pcbddc->is_R_local, &tis);
3977:       ISSort(tis);
3978:       ISComplement(tis, 0, pcis->n, &is_aux);
3979:       ISDestroy(&tis);
3980:     } else {
3981:       ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux);
3982:     }
3983: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3984:     oldpin = pcbddc->local_mat->boundtocpu;
3985: #endif
3986:     MatBindToCPU(pcbddc->local_mat, PETSC_TRUE);
3987:     MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV);
3988:     MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR);
3989:     PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij);
3990:     if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
3991:       MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR);
3992:     }
3993:     MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV);
3994: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3995:     MatBindToCPU(pcbddc->local_mat, oldpin);
3996: #endif
3997:     ISDestroy(&is_aux);
3998:   }

4000:   /* Matrix of coarse basis functions (local) */
4001:   if (pcbddc->coarse_phi_B) {
4002:     PetscInt on_B, on_primal, on_D = n_D;
4003:     if (pcbddc->coarse_phi_D) MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL);
4004:     MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal);
4005:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4006:       PetscScalar *marray;

4008:       MatDenseGetArray(pcbddc->coarse_phi_B, &marray);
4009:       PetscFree(marray);
4010:       MatDestroy(&pcbddc->coarse_phi_B);
4011:       MatDestroy(&pcbddc->coarse_psi_B);
4012:       MatDestroy(&pcbddc->coarse_phi_D);
4013:       MatDestroy(&pcbddc->coarse_psi_D);
4014:     }
4015:   }

4017:   if (!pcbddc->coarse_phi_B) {
4018:     PetscScalar *marr;

4020:     /* memory size */
4021:     n = n_B * pcbddc->local_primal_size;
4022:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size;
4023:     if (!pcbddc->symmetric_primal) n *= 2;
4024:     PetscCalloc1(n, &marr);
4025:     MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B);
4026:     marr += n_B * pcbddc->local_primal_size;
4027:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4028:       MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D);
4029:       marr += n_D * pcbddc->local_primal_size;
4030:     }
4031:     if (!pcbddc->symmetric_primal) {
4032:       MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B);
4033:       marr += n_B * pcbddc->local_primal_size;
4034:       if (pcbddc->switch_static || pcbddc->dbg_flag) MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D);
4035:     } else {
4036:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4037:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4038:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4039:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4040:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4041:       }
4042:     }
4043:   }

4045:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4046:   p0_lidx_I = NULL;
4047:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4048:     const PetscInt *idxs;

4050:     ISGetIndices(pcis->is_I_local, &idxs);
4051:     PetscMalloc1(pcbddc->benign_n, &p0_lidx_I);
4052:     for (i = 0; i < pcbddc->benign_n; i++) PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]);
4053:     ISRestoreIndices(pcis->is_I_local, &idxs);
4054:   }

4056:   /* vertices */
4057:   if (n_vertices) {
4058:     PetscBool restoreavr = PETSC_FALSE;

4060:     MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV);

4062:     if (n_R) {
4063:       Mat                A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */
4064:       PetscBLASInt       B_N, B_one            = 1;
4065:       const PetscScalar *x;
4066:       PetscScalar       *y;

4068:       MatScale(A_RV, m_one);
4069:       if (need_benign_correction) {
4070:         ISLocalToGlobalMapping RtoN;
4071:         IS                     is_p0;
4072:         PetscInt              *idxs_p0, n;

4074:         PetscMalloc1(pcbddc->benign_n, &idxs_p0);
4075:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN);
4076:         ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0);
4078:         ISLocalToGlobalMappingDestroy(&RtoN);
4079:         ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0);
4080:         MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr);
4081:         ISDestroy(&is_p0);
4082:       }

4084:       MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV);
4085:       if (!sparserhs || need_benign_correction) {
4086:         if (lda_rhs == n_R) {
4087:           MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV);
4088:         } else {
4089:           PetscScalar    *av, *array;
4090:           const PetscInt *xadj, *adjncy;
4091:           PetscInt        n;
4092:           PetscBool       flg_row;

4094:           array = work + lda_rhs * n_vertices;
4095:           PetscArrayzero(array, lda_rhs * n_vertices);
4096:           MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV);
4097:           MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row);
4098:           MatSeqAIJGetArray(A_RV, &av);
4099:           for (i = 0; i < n; i++) {
4100:             PetscInt j;
4101:             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j];
4102:           }
4103:           MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row);
4104:           MatDestroy(&A_RV);
4105:           MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV);
4106:         }
4107:         if (need_benign_correction) {
4108:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4109:           PetscScalar       *marr;

4111:           MatDenseGetArray(A_RV, &marr);
4112:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4114:                  | 0 0  0 | (V)
4115:              L = | 0 0 -1 | (P-p0)
4116:                  | 0 0 -1 | (p0)

4118:           */
4119:           for (i = 0; i < reuse_solver->benign_n; i++) {
4120:             const PetscScalar *vals;
4121:             const PetscInt    *idxs, *idxs_zero;
4122:             PetscInt           n, j, nz;

4124:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz);
4125:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero);
4126:             MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals);
4127:             for (j = 0; j < n; j++) {
4128:               PetscScalar val = vals[j];
4129:               PetscInt    k, col = idxs[j];
4130:               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4131:             }
4132:             MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals);
4133:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero);
4134:           }
4135:           MatDenseRestoreArray(A_RV, &marr);
4136:         }
4137:         PetscObjectReference((PetscObject)A_RV);
4138:         Brhs = A_RV;
4139:       } else {
4140:         Mat tA_RVT, A_RVT;

4142:         if (!pcbddc->symmetric_primal) {
4143:           /* A_RV already scaled by -1 */
4144:           MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT);
4145:         } else {
4146:           restoreavr = PETSC_TRUE;
4147:           MatScale(A_VR, -1.0);
4148:           PetscObjectReference((PetscObject)A_VR);
4149:           A_RVT = A_VR;
4150:         }
4151:         if (lda_rhs != n_R) {
4152:           PetscScalar *aa;
4153:           PetscInt     r, *ii, *jj;
4154:           PetscBool    done;

4156:           MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done);
4158:           MatSeqAIJGetArray(A_RVT, &aa);
4159:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT);
4160:           MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done);
4162:         } else {
4163:           PetscObjectReference((PetscObject)A_RVT);
4164:           tA_RVT = A_RVT;
4165:         }
4166:         MatCreateTranspose(tA_RVT, &Brhs);
4167:         MatDestroy(&tA_RVT);
4168:         MatDestroy(&A_RVT);
4169:       }
4170:       if (F) {
4171:         /* need to correct the rhs */
4172:         if (need_benign_correction) {
4173:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4174:           PetscScalar       *marr;

4176:           MatDenseGetArray(Brhs, &marr);
4177:           if (lda_rhs != n_R) {
4178:             for (i = 0; i < n_vertices; i++) {
4179:               VecPlaceArray(dummy_vec, marr + i * lda_rhs);
4180:               PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE);
4181:               VecResetArray(dummy_vec);
4182:             }
4183:           } else {
4184:             for (i = 0; i < n_vertices; i++) {
4185:               VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs);
4186:               PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE);
4187:               VecResetArray(pcbddc->vec1_R);
4188:             }
4189:           }
4190:           MatDenseRestoreArray(Brhs, &marr);
4191:         }
4192:         MatMatSolve(F, Brhs, A_RRmA_RV);
4193:         if (restoreavr) MatScale(A_VR, -1.0);
4194:         /* need to correct the solution */
4195:         if (need_benign_correction) {
4196:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4197:           PetscScalar       *marr;

4199:           MatDenseGetArray(A_RRmA_RV, &marr);
4200:           if (lda_rhs != n_R) {
4201:             for (i = 0; i < n_vertices; i++) {
4202:               VecPlaceArray(dummy_vec, marr + i * lda_rhs);
4203:               PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE);
4204:               VecResetArray(dummy_vec);
4205:             }
4206:           } else {
4207:             for (i = 0; i < n_vertices; i++) {
4208:               VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs);
4209:               PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE);
4210:               VecResetArray(pcbddc->vec1_R);
4211:             }
4212:           }
4213:           MatDenseRestoreArray(A_RRmA_RV, &marr);
4214:         }
4215:       } else {
4216:         MatDenseGetArray(Brhs, &y);
4217:         for (i = 0; i < n_vertices; i++) {
4218:           VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs);
4219:           VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs);
4220:           KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R);
4221:           KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
4222:           VecResetArray(pcbddc->vec1_R);
4223:           VecResetArray(pcbddc->vec2_R);
4224:         }
4225:         MatDenseRestoreArray(Brhs, &y);
4226:       }
4227:       MatDestroy(&A_RV);
4228:       MatDestroy(&Brhs);
4229:       /* S_VV and S_CV */
4230:       if (n_constraints) {
4231:         Mat B;

4233:         PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices);
4234:         for (i = 0; i < n_vertices; i++) {
4235:           VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs);
4236:           VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B);
4237:           VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
4238:           VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
4239:           VecResetArray(pcis->vec1_B);
4240:           VecResetArray(pcbddc->vec1_R);
4241:         }
4242:         MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B);
4243:         /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4244:         MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV);
4245:         MatProductSetType(S_CV, MATPRODUCT_AB);
4246:         MatProductSetFromOptions(S_CV);
4247:         MatProductSymbolic(S_CV);
4248:         MatProductNumeric(S_CV);
4249:         MatProductClear(S_CV);

4251:         MatDestroy(&B);
4252:         MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B);
4253:         /* Reuse B = local_auxmat2_R * S_CV */
4254:         MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B);
4255:         MatProductSetType(B, MATPRODUCT_AB);
4256:         MatProductSetFromOptions(B);
4257:         MatProductSymbolic(B);
4258:         MatProductNumeric(B);

4260:         MatScale(S_CV, m_one);
4261:         PetscBLASIntCast(lda_rhs * n_vertices, &B_N);
4262:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one));
4263:         MatDestroy(&B);
4264:       }
4265:       if (lda_rhs != n_R) {
4266:         MatDestroy(&A_RRmA_RV);
4267:         MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV);
4268:         MatDenseSetLDA(A_RRmA_RV, lda_rhs);
4269:       }
4270:       MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt);
4271:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4272:       if (need_benign_correction) {
4273:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4274:         PetscScalar       *marr, *sums;

4276:         PetscMalloc1(n_vertices, &sums);
4277:         MatDenseGetArray(S_VVt, &marr);
4278:         for (i = 0; i < reuse_solver->benign_n; i++) {
4279:           const PetscScalar *vals;
4280:           const PetscInt    *idxs, *idxs_zero;
4281:           PetscInt           n, j, nz;

4283:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz);
4284:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero);
4285:           for (j = 0; j < n_vertices; j++) {
4286:             PetscInt k;
4287:             sums[j] = 0.;
4288:             for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs];
4289:           }
4290:           MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals);
4291:           for (j = 0; j < n; j++) {
4292:             PetscScalar val = vals[j];
4293:             PetscInt    k;
4294:             for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k];
4295:           }
4296:           MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals);
4297:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero);
4298:         }
4299:         PetscFree(sums);
4300:         MatDenseRestoreArray(S_VVt, &marr);
4301:         MatDestroy(&A_RV_bcorr);
4302:       }
4303:       MatDestroy(&A_RRmA_RV);
4304:       PetscBLASIntCast(n_vertices * n_vertices, &B_N);
4305:       MatDenseGetArrayRead(A_VV, &x);
4306:       MatDenseGetArray(S_VVt, &y);
4307:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one));
4308:       MatDenseRestoreArrayRead(A_VV, &x);
4309:       MatDenseRestoreArray(S_VVt, &y);
4310:       MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN);
4311:       MatDestroy(&S_VVt);
4312:     } else {
4313:       MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN);
4314:     }
4315:     MatDestroy(&A_VV);

4317:     /* coarse basis functions */
4318:     for (i = 0; i < n_vertices; i++) {
4319:       Vec         v;
4320:       PetscScalar one = 1.0, zero = 0.0;

4322:       VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i);
4323:       MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v);
4324:       VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4325:       VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4326:       if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4327:         PetscMPIInt rank;
4328:         MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank);
4330:       }
4331:       VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES);
4332:       VecAssemblyBegin(v); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */
4333:       VecAssemblyEnd(v);
4334:       MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v);

4336:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4337:         PetscInt j;

4339:         MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v);
4340:         VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4341:         VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4342:         if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4343:           PetscMPIInt rank;
4344:           MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank);
4346:         }
4347:         for (j = 0; j < pcbddc->benign_n; j++) VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES);
4348:         VecAssemblyBegin(v);
4349:         VecAssemblyEnd(v);
4350:         MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v);
4351:       }
4352:       VecResetArray(pcbddc->vec1_R);
4353:     }
4354:     /* if n_R == 0 the object is not destroyed */
4355:     MatDestroy(&A_RV);
4356:   }
4357:   VecDestroy(&dummy_vec);

4359:   if (n_constraints) {
4360:     Mat B;

4362:     MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B);
4363:     MatScale(S_CC, m_one);
4364:     MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B);
4365:     MatProductSetType(B, MATPRODUCT_AB);
4366:     MatProductSetFromOptions(B);
4367:     MatProductSymbolic(B);
4368:     MatProductNumeric(B);

4370:     MatScale(S_CC, m_one);
4371:     if (n_vertices) {
4372:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4373:         MatTransposeSetPrecursor(S_CV, S_VC);
4374:         MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC);
4375:       } else {
4376:         Mat S_VCt;

4378:         if (lda_rhs != n_R) {
4379:           MatDestroy(&B);
4380:           MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B);
4381:           MatDenseSetLDA(B, lda_rhs);
4382:         }
4383:         MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt);
4384:         MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN);
4385:         MatDestroy(&S_VCt);
4386:       }
4387:     }
4388:     MatDestroy(&B);
4389:     /* coarse basis functions */
4390:     for (i = 0; i < n_constraints; i++) {
4391:       Vec v;

4393:       VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i);
4394:       MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v);
4395:       VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4396:       VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4397:       MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v);
4398:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4399:         PetscInt    j;
4400:         PetscScalar zero = 0.0;
4401:         MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v);
4402:         VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4403:         VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4404:         for (j = 0; j < pcbddc->benign_n; j++) VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES);
4405:         VecAssemblyBegin(v);
4406:         VecAssemblyEnd(v);
4407:         MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v);
4408:       }
4409:       VecResetArray(pcbddc->vec1_R);
4410:     }
4411:   }
4412:   if (n_constraints) MatDestroy(&local_auxmat2_R);
4413:   PetscFree(p0_lidx_I);

4415:   /* coarse matrix entries relative to B_0 */
4416:   if (pcbddc->benign_n) {
4417:     Mat                B0_B, B0_BPHI;
4418:     IS                 is_dummy;
4419:     const PetscScalar *data;
4420:     PetscInt           j;

4422:     ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy);
4423:     MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B);
4424:     ISDestroy(&is_dummy);
4425:     MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI);
4426:     MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI);
4427:     MatDenseGetArrayRead(B0_BPHI, &data);
4428:     for (j = 0; j < pcbddc->benign_n; j++) {
4429:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4430:       for (i = 0; i < pcbddc->local_primal_size; i++) {
4431:         coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j];
4432:         coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j];
4433:       }
4434:     }
4435:     MatDenseRestoreArrayRead(B0_BPHI, &data);
4436:     MatDestroy(&B0_B);
4437:     MatDestroy(&B0_BPHI);
4438:   }

4440:   /* compute other basis functions for non-symmetric problems */
4441:   if (!pcbddc->symmetric_primal) {
4442:     Mat          B_V = NULL, B_C = NULL;
4443:     PetscScalar *marray;

4445:     if (n_constraints) {
4446:       Mat S_CCT, C_CRT;

4448:       MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT);
4449:       MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT);
4450:       MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C);
4451:       MatDestroy(&S_CCT);
4452:       if (n_vertices) {
4453:         Mat S_VCT;

4455:         MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT);
4456:         MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V);
4457:         MatDestroy(&S_VCT);
4458:       }
4459:       MatDestroy(&C_CRT);
4460:     } else {
4461:       MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V);
4462:     }
4463:     if (n_vertices && n_R) {
4464:       PetscScalar    *av, *marray;
4465:       const PetscInt *xadj, *adjncy;
4466:       PetscInt        n;
4467:       PetscBool       flg_row;

4469:       /* B_V = B_V - A_VR^T */
4470:       MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR);
4471:       MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row);
4472:       MatSeqAIJGetArray(A_VR, &av);
4473:       MatDenseGetArray(B_V, &marray);
4474:       for (i = 0; i < n; i++) {
4475:         PetscInt j;
4476:         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
4477:       }
4478:       MatDenseRestoreArray(B_V, &marray);
4479:       MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row);
4480:       MatDestroy(&A_VR);
4481:     }

4483:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4484:     if (n_vertices) {
4485:       MatDenseGetArray(B_V, &marray);
4486:       for (i = 0; i < n_vertices; i++) {
4487:         VecPlaceArray(pcbddc->vec1_R, marray + i * n_R);
4488:         VecPlaceArray(pcbddc->vec2_R, work + i * n_R);
4489:         KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R);
4490:         KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
4491:         VecResetArray(pcbddc->vec1_R);
4492:         VecResetArray(pcbddc->vec2_R);
4493:       }
4494:       MatDenseRestoreArray(B_V, &marray);
4495:     }
4496:     if (B_C) {
4497:       MatDenseGetArray(B_C, &marray);
4498:       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
4499:         VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R);
4500:         VecPlaceArray(pcbddc->vec2_R, work + i * n_R);
4501:         KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R);
4502:         KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
4503:         VecResetArray(pcbddc->vec1_R);
4504:         VecResetArray(pcbddc->vec2_R);
4505:       }
4506:       MatDenseRestoreArray(B_C, &marray);
4507:     }
4508:     /* coarse basis functions */
4509:     for (i = 0; i < pcbddc->local_primal_size; i++) {
4510:       Vec v;

4512:       VecPlaceArray(pcbddc->vec1_R, work + i * n_R);
4513:       MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v);
4514:       VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4515:       VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4516:       if (i < n_vertices) {
4517:         PetscScalar one = 1.0;
4518:         VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES);
4519:         VecAssemblyBegin(v);
4520:         VecAssemblyEnd(v);
4521:       }
4522:       MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v);

4524:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4525:         MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v);
4526:         VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4527:         VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD);
4528:         MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v);
4529:       }
4530:       VecResetArray(pcbddc->vec1_R);
4531:     }
4532:     MatDestroy(&B_V);
4533:     MatDestroy(&B_C);
4534:   }

4536:   /* free memory */
4537:   PetscFree(idx_V_B);
4538:   MatDestroy(&S_VV);
4539:   MatDestroy(&S_CV);
4540:   MatDestroy(&S_VC);
4541:   MatDestroy(&S_CC);
4542:   PetscFree(work);
4543:   if (n_vertices) MatDestroy(&A_VR);
4544:   if (n_constraints) MatDestroy(&C_CR);
4545:   PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0);

4547:   /* Checking coarse_sub_mat and coarse basis functions */
4548:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4549:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4550:   if (pcbddc->dbg_flag) {
4551:     Mat       coarse_sub_mat;
4552:     Mat       AUXMAT, TM1, TM2, TM3, TM4;
4553:     Mat       coarse_phi_D, coarse_phi_B;
4554:     Mat       coarse_psi_D, coarse_psi_B;
4555:     Mat       A_II, A_BB, A_IB, A_BI;
4556:     Mat       C_B, CPHI;
4557:     IS        is_dummy;
4558:     Vec       mones;
4559:     MatType   checkmattype = MATSEQAIJ;
4560:     PetscReal real_value;

4562:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4563:       Mat A;
4564:       PCBDDCBenignProject(pc, NULL, NULL, &A);
4565:       MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II);
4566:       MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB);
4567:       MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI);
4568:       MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB);
4569:       MatDestroy(&A);
4570:     } else {
4571:       MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II);
4572:       MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB);
4573:       MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI);
4574:       MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB);
4575:     }
4576:     MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D);
4577:     MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B);
4578:     if (!pcbddc->symmetric_primal) {
4579:       MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D);
4580:       MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B);
4581:     }
4582:     MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat);

4584:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
4585:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal);
4586:     PetscViewerFlush(pcbddc->dbg_viewer);
4587:     if (!pcbddc->symmetric_primal) {
4588:       MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4589:       MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1);
4590:       MatDestroy(&AUXMAT);
4591:       MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4592:       MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2);
4593:       MatDestroy(&AUXMAT);
4594:       MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4595:       MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3);
4596:       MatDestroy(&AUXMAT);
4597:       MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4598:       MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4);
4599:       MatDestroy(&AUXMAT);
4600:     } else {
4601:       MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1);
4602:       MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2);
4603:       MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4604:       MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3);
4605:       MatDestroy(&AUXMAT);
4606:       MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT);
4607:       MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4);
4608:       MatDestroy(&AUXMAT);
4609:     }
4610:     MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN);
4611:     MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN);
4612:     MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN);
4613:     MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1);
4614:     if (pcbddc->benign_n) {
4615:       Mat                B0_B, B0_BPHI;
4616:       const PetscScalar *data2;
4617:       PetscScalar       *data;
4618:       PetscInt           j;

4620:       ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy);
4621:       MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B);
4622:       MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI);
4623:       MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI);
4624:       MatDenseGetArray(TM1, &data);
4625:       MatDenseGetArrayRead(B0_BPHI, &data2);
4626:       for (j = 0; j < pcbddc->benign_n; j++) {
4627:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4628:         for (i = 0; i < pcbddc->local_primal_size; i++) {
4629:           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
4630:           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
4631:         }
4632:       }
4633:       MatDenseRestoreArray(TM1, &data);
4634:       MatDenseRestoreArrayRead(B0_BPHI, &data2);
4635:       MatDestroy(&B0_B);
4636:       ISDestroy(&is_dummy);
4637:       MatDestroy(&B0_BPHI);
4638:     }
4639: #if 0
4640:   {
4641:     PetscViewer viewer;
4642:     char filename[256];
4643:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4644:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4645:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4646:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4647:     MatView(coarse_sub_mat,viewer);
4648:     PetscObjectSetName((PetscObject)TM1,"projected");
4649:     MatView(TM1,viewer);
4650:     if (pcbddc->coarse_phi_B) {
4651:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4652:       MatView(pcbddc->coarse_phi_B,viewer);
4653:     }
4654:     if (pcbddc->coarse_phi_D) {
4655:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4656:       MatView(pcbddc->coarse_phi_D,viewer);
4657:     }
4658:     if (pcbddc->coarse_psi_B) {
4659:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4660:       MatView(pcbddc->coarse_psi_B,viewer);
4661:     }
4662:     if (pcbddc->coarse_psi_D) {
4663:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4664:       MatView(pcbddc->coarse_psi_D,viewer);
4665:     }
4666:     PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4667:     MatView(pcbddc->local_mat,viewer);
4668:     PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4669:     MatView(pcbddc->ConstraintMatrix,viewer);
4670:     PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4671:     ISView(pcis->is_I_local,viewer);
4672:     PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4673:     ISView(pcis->is_B_local,viewer);
4674:     PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4675:     ISView(pcbddc->is_R_local,viewer);
4676:     PetscViewerDestroy(&viewer);
4677:   }
4678: #endif
4679:     MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN);
4680:     MatNorm(TM1, NORM_FROBENIUS, &real_value);
4681:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4682:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value);

4684:     /* check constraints */
4685:     ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy);
4686:     MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B);
4687:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4688:       MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI);
4689:     } else {
4690:       PetscScalar *data;
4691:       Mat          tmat;
4692:       MatDenseGetArray(pcbddc->coarse_phi_B, &data);
4693:       MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat);
4694:       MatDenseRestoreArray(pcbddc->coarse_phi_B, &data);
4695:       MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI);
4696:       MatDestroy(&tmat);
4697:     }
4698:     MatCreateVecs(CPHI, &mones, NULL);
4699:     VecSet(mones, -1.0);
4700:     MatDiagonalSet(CPHI, mones, ADD_VALUES);
4701:     MatNorm(CPHI, NORM_FROBENIUS, &real_value);
4702:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value);
4703:     if (!pcbddc->symmetric_primal) {
4704:       MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI);
4705:       VecSet(mones, -1.0);
4706:       MatDiagonalSet(CPHI, mones, ADD_VALUES);
4707:       MatNorm(CPHI, NORM_FROBENIUS, &real_value);
4708:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value);
4709:     }
4710:     MatDestroy(&C_B);
4711:     MatDestroy(&CPHI);
4712:     ISDestroy(&is_dummy);
4713:     VecDestroy(&mones);
4714:     PetscViewerFlush(pcbddc->dbg_viewer);
4715:     MatDestroy(&A_II);
4716:     MatDestroy(&A_BB);
4717:     MatDestroy(&A_IB);
4718:     MatDestroy(&A_BI);
4719:     MatDestroy(&TM1);
4720:     MatDestroy(&TM2);
4721:     MatDestroy(&TM3);
4722:     MatDestroy(&TM4);
4723:     MatDestroy(&coarse_phi_D);
4724:     MatDestroy(&coarse_phi_B);
4725:     if (!pcbddc->symmetric_primal) {
4726:       MatDestroy(&coarse_psi_D);
4727:       MatDestroy(&coarse_psi_B);
4728:     }
4729:     MatDestroy(&coarse_sub_mat);
4730:   }
4731:   /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4732:   {
4733:     PetscBool gpu;

4735:     PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu);
4736:     if (gpu) {
4737:       if (pcbddc->local_auxmat1) MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1);
4738:       if (pcbddc->local_auxmat2) MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2);
4739:       if (pcbddc->coarse_phi_B) MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B);
4740:       if (pcbddc->coarse_phi_D) MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D);
4741:       if (pcbddc->coarse_psi_B) MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B);
4742:       if (pcbddc->coarse_psi_D) MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D);
4743:     }
4744:   }
4745:   /* get back data */
4746:   *coarse_submat_vals_n = coarse_submat_vals;
4747:   return 0;
4748: }

4750: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
4751: {
4752:   Mat      *work_mat;
4753:   IS        isrow_s, iscol_s;
4754:   PetscBool rsorted, csorted;
4755:   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;

4757:   ISSorted(isrow, &rsorted);
4758:   ISSorted(iscol, &csorted);
4759:   ISGetLocalSize(isrow, &rsize);
4760:   ISGetLocalSize(iscol, &csize);

4762:   if (!rsorted) {
4763:     const PetscInt *idxs;
4764:     PetscInt       *idxs_sorted, i;

4766:     PetscMalloc1(rsize, &idxs_perm_r);
4767:     PetscMalloc1(rsize, &idxs_sorted);
4768:     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
4769:     ISGetIndices(isrow, &idxs);
4770:     PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r);
4771:     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
4772:     ISRestoreIndices(isrow, &idxs);
4773:     ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s);
4774:   } else {
4775:     PetscObjectReference((PetscObject)isrow);
4776:     isrow_s = isrow;
4777:   }

4779:   if (!csorted) {
4780:     if (isrow == iscol) {
4781:       PetscObjectReference((PetscObject)isrow_s);
4782:       iscol_s = isrow_s;
4783:     } else {
4784:       const PetscInt *idxs;
4785:       PetscInt       *idxs_sorted, i;

4787:       PetscMalloc1(csize, &idxs_perm_c);
4788:       PetscMalloc1(csize, &idxs_sorted);
4789:       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
4790:       ISGetIndices(iscol, &idxs);
4791:       PetscSortIntWithPermutation(csize, idxs, idxs_perm_c);
4792:       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
4793:       ISRestoreIndices(iscol, &idxs);
4794:       ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s);
4795:     }
4796:   } else {
4797:     PetscObjectReference((PetscObject)iscol);
4798:     iscol_s = iscol;
4799:   }

4801:   MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat);

4803:   if (!rsorted || !csorted) {
4804:     Mat new_mat;
4805:     IS  is_perm_r, is_perm_c;

4807:     if (!rsorted) {
4808:       PetscInt *idxs_r, i;
4809:       PetscMalloc1(rsize, &idxs_r);
4810:       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
4811:       PetscFree(idxs_perm_r);
4812:       ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r);
4813:     } else {
4814:       ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r);
4815:     }
4816:     ISSetPermutation(is_perm_r);

4818:     if (!csorted) {
4819:       if (isrow_s == iscol_s) {
4820:         PetscObjectReference((PetscObject)is_perm_r);
4821:         is_perm_c = is_perm_r;
4822:       } else {
4823:         PetscInt *idxs_c, i;
4825:         PetscMalloc1(csize, &idxs_c);
4826:         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
4827:         PetscFree(idxs_perm_c);
4828:         ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c);
4829:       }
4830:     } else {
4831:       ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c);
4832:     }
4833:     ISSetPermutation(is_perm_c);

4835:     MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat);
4836:     MatDestroy(&work_mat[0]);
4837:     work_mat[0] = new_mat;
4838:     ISDestroy(&is_perm_r);
4839:     ISDestroy(&is_perm_c);
4840:   }

4842:   PetscObjectReference((PetscObject)work_mat[0]);
4843:   *B = work_mat[0];
4844:   MatDestroyMatrices(1, &work_mat);
4845:   ISDestroy(&isrow_s);
4846:   ISDestroy(&iscol_s);
4847:   return 0;
4848: }

4850: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4851: {
4852:   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
4853:   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
4854:   Mat       new_mat, lA;
4855:   IS        is_local, is_global;
4856:   PetscInt  local_size;
4857:   PetscBool isseqaij, issym, isset;

4859:   MatDestroy(&pcbddc->local_mat);
4860:   MatGetSize(matis->A, &local_size, NULL);
4861:   ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local);
4862:   ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global);
4863:   ISDestroy(&is_local);
4864:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat);
4865:   ISDestroy(&is_global);

4867:   if (pcbddc->dbg_flag) {
4868:     Vec       x, x_change;
4869:     PetscReal error;

4871:     MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change);
4872:     VecSetRandom(x, NULL);
4873:     MatMult(ChangeOfBasisMatrix, x, x_change);
4874:     VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD);
4875:     VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD);
4876:     MatMult(new_mat, matis->x, matis->y);
4877:     if (!pcbddc->change_interior) {
4878:       const PetscScalar *x, *y, *v;
4879:       PetscReal          lerror = 0.;
4880:       PetscInt           i;

4882:       VecGetArrayRead(matis->x, &x);
4883:       VecGetArrayRead(matis->y, &y);
4884:       VecGetArrayRead(matis->counter, &v);
4885:       for (i = 0; i < local_size; i++)
4886:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
4887:       VecRestoreArrayRead(matis->x, &x);
4888:       VecRestoreArrayRead(matis->y, &y);
4889:       VecRestoreArrayRead(matis->counter, &v);
4890:       MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc));
4891:       if (error > PETSC_SMALL) {
4892:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4893:           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
4894:         } else {
4895:           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
4896:         }
4897:       }
4898:     }
4899:     VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE);
4900:     VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE);
4901:     VecAXPY(x, -1.0, x_change);
4902:     VecNorm(x, NORM_INFINITY, &error);
4903:     if (error > PETSC_SMALL) {
4904:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4905:         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
4906:       } else {
4907:         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
4908:       }
4909:     }
4910:     VecDestroy(&x);
4911:     VecDestroy(&x_change);
4912:   }

4914:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4915:   PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA);

4917:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4918:   PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij);
4919:   if (isseqaij) {
4920:     MatDestroy(&pcbddc->local_mat);
4921:     MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat);
4922:     if (lA) {
4923:       Mat work;
4924:       MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work);
4925:       PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work);
4926:       MatDestroy(&work);
4927:     }
4928:   } else {
4929:     Mat work_mat;

4931:     MatDestroy(&pcbddc->local_mat);
4932:     MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat);
4933:     MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat);
4934:     MatDestroy(&work_mat);
4935:     if (lA) {
4936:       Mat work;
4937:       MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat);
4938:       MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work);
4939:       PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work);
4940:       MatDestroy(&work);
4941:     }
4942:   }
4943:   MatIsSymmetricKnown(matis->A, &isset, &issym);
4944:   if (isset) MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym);
4945:   MatDestroy(&new_mat);
4946:   return 0;
4947: }

4949: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4950: {
4951:   PC_IS          *pcis        = (PC_IS *)(pc->data);
4952:   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
4953:   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
4954:   PetscInt       *idx_R_local = NULL;
4955:   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
4956:   PetscInt        vbs, bs;
4957:   PetscBT         bitmask = NULL;

4959:   /*
4960:     No need to setup local scatters if
4961:       - primal space is unchanged
4962:         AND
4963:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4964:         AND
4965:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
4966:   */
4967:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) return 0;
4968:   /* destroy old objects */
4969:   ISDestroy(&pcbddc->is_R_local);
4970:   VecScatterDestroy(&pcbddc->R_to_B);
4971:   VecScatterDestroy(&pcbddc->R_to_D);
4972:   /* Set Non-overlapping dimensions */
4973:   n_B        = pcis->n_B;
4974:   n_D        = pcis->n - n_B;
4975:   n_vertices = pcbddc->n_vertices;

4977:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

4979:   /* create auxiliary bitmask and allocate workspace */
4980:   if (!sub_schurs || !sub_schurs->reuse_solver) {
4981:     PetscMalloc1(pcis->n - n_vertices, &idx_R_local);
4982:     PetscBTCreate(pcis->n, &bitmask);
4983:     for (i = 0; i < n_vertices; i++) PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]);

4985:     for (i = 0, n_R = 0; i < pcis->n; i++) {
4986:       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
4987:     }
4988:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
4989:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4991:     ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local);
4992:     ISGetLocalSize(reuse_solver->is_R, &n_R);
4993:   }

4995:   /* Block code */
4996:   vbs = 1;
4997:   MatGetBlockSize(pcbddc->local_mat, &bs);
4998:   if (bs > 1 && !(n_vertices % bs)) {
4999:     PetscBool is_blocked = PETSC_TRUE;
5000:     PetscInt *vary;
5001:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5002:       PetscMalloc1(pcis->n / bs, &vary);
5003:       PetscArrayzero(vary, pcis->n / bs);
5004:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5005:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5006:       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5007:       for (i = 0; i < pcis->n / bs; i++) {
5008:         if (vary[i] != 0 && vary[i] != bs) {
5009:           is_blocked = PETSC_FALSE;
5010:           break;
5011:         }
5012:       }
5013:       PetscFree(vary);
5014:     } else {
5015:       /* Verify directly the R set */
5016:       for (i = 0; i < n_R / bs; i++) {
5017:         PetscInt j, node = idx_R_local[bs * i];
5018:         for (j = 1; j < bs; j++) {
5019:           if (node != idx_R_local[bs * i + j] - j) {
5020:             is_blocked = PETSC_FALSE;
5021:             break;
5022:           }
5023:         }
5024:       }
5025:     }
5026:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5027:       vbs = bs;
5028:       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5029:     }
5030:   }
5031:   ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local);
5032:   if (sub_schurs && sub_schurs->reuse_solver) {
5033:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5035:     ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local);
5036:     ISDestroy(&reuse_solver->is_R);
5037:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
5038:     reuse_solver->is_R = pcbddc->is_R_local;
5039:   } else {
5040:     PetscFree(idx_R_local);
5041:   }

5043:   /* print some info if requested */
5044:   if (pcbddc->dbg_flag) {
5045:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
5046:     PetscViewerFlush(pcbddc->dbg_viewer);
5047:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5048:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank);
5049:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B);
5050:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5051:                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5052:     PetscViewerFlush(pcbddc->dbg_viewer);
5053:   }

5055:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5056:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5057:     IS        is_aux1, is_aux2;
5058:     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;

5060:     ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local);
5061:     PetscMalloc1(pcis->n_B - n_vertices, &aux_array1);
5062:     PetscMalloc1(pcis->n_B - n_vertices, &aux_array2);
5063:     ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices);
5064:     for (i = 0; i < n_D; i++) PetscBTSet(bitmask, is_indices[i]);
5065:     ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices);
5066:     for (i = 0, j = 0; i < n_R; i++) {
5067:       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5068:     }
5069:     ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1);
5070:     ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices);
5071:     for (i = 0, j = 0; i < n_B; i++) {
5072:       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5073:     }
5074:     ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices);
5075:     ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2);
5076:     VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B);
5077:     ISDestroy(&is_aux1);
5078:     ISDestroy(&is_aux2);

5080:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5081:       PetscMalloc1(n_D, &aux_array1);
5082:       for (i = 0, j = 0; i < n_R; i++) {
5083:         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5084:       }
5085:       ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1);
5086:       VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D);
5087:       ISDestroy(&is_aux1);
5088:     }
5089:     PetscBTDestroy(&bitmask);
5090:     ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local);
5091:   } else {
5092:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5093:     IS                 tis;
5094:     PetscInt           schur_size;

5096:     ISGetLocalSize(reuse_solver->is_B, &schur_size);
5097:     ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis);
5098:     VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B);
5099:     ISDestroy(&tis);
5100:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5101:       ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis);
5102:       VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D);
5103:       ISDestroy(&tis);
5104:     }
5105:   }
5106:   return 0;
5107: }

5109: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5110: {
5111:   MatNullSpace   NullSpace;
5112:   Mat            dmat;
5113:   const Vec     *nullvecs;
5114:   Vec            v, v2, *nullvecs2;
5115:   VecScatter     sct = NULL;
5116:   PetscContainer c;
5117:   PetscScalar   *ddata;
5118:   PetscInt       k, nnsp_size, bsiz, bsiz2, n, N, bs;
5119:   PetscBool      nnsp_has_cnst;

5121:   if (!is && !B) { /* MATIS */
5122:     Mat_IS *matis = (Mat_IS *)A->data;

5124:     if (!B) MatISGetLocalMat(A, &B);
5125:     sct = matis->cctx;
5126:     PetscObjectReference((PetscObject)sct);
5127:   } else {
5128:     MatGetNullSpace(B, &NullSpace);
5129:     if (!NullSpace) MatGetNearNullSpace(B, &NullSpace);
5130:     if (NullSpace) return 0;
5131:   }
5132:   MatGetNullSpace(A, &NullSpace);
5133:   if (!NullSpace) MatGetNearNullSpace(A, &NullSpace);
5134:   if (!NullSpace) return 0;

5136:   MatCreateVecs(A, &v, NULL);
5137:   MatCreateVecs(B, &v2, NULL);
5138:   if (!sct) VecScatterCreate(v, is, v2, NULL, &sct);
5139:   MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs);
5140:   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5141:   PetscMalloc1(bsiz, &nullvecs2);
5142:   VecGetBlockSize(v2, &bs);
5143:   VecGetSize(v2, &N);
5144:   VecGetLocalSize(v2, &n);
5145:   PetscMalloc1(n * bsiz, &ddata);
5146:   for (k = 0; k < nnsp_size; k++) {
5147:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]);
5148:     VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD);
5149:     VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD);
5150:   }
5151:   if (nnsp_has_cnst) {
5152:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]);
5153:     VecSet(nullvecs2[nnsp_size], 1.0);
5154:   }
5155:   PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2);
5156:   MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace);

5158:   MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat);
5159:   PetscContainerCreate(PetscObjectComm((PetscObject)B), &c);
5160:   PetscContainerSetPointer(c, ddata);
5161:   PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault);
5162:   PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c);
5163:   PetscContainerDestroy(&c);
5164:   PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat);
5165:   MatDestroy(&dmat);

5167:   for (k = 0; k < bsiz; k++) VecDestroy(&nullvecs2[k]);
5168:   PetscFree(nullvecs2);
5169:   MatSetNearNullSpace(B, NullSpace);
5170:   MatNullSpaceDestroy(&NullSpace);
5171:   VecDestroy(&v);
5172:   VecDestroy(&v2);
5173:   VecScatterDestroy(&sct);
5174:   return 0;
5175: }

5177: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5178: {
5179:   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5180:   PC_IS       *pcis   = (PC_IS *)pc->data;
5181:   PC           pc_temp;
5182:   Mat          A_RR;
5183:   MatNullSpace nnsp;
5184:   MatReuse     reuse;
5185:   PetscScalar  m_one = -1.0;
5186:   PetscReal    value;
5187:   PetscInt     n_D, n_R;
5188:   PetscBool    issbaij, opts, isset, issym;
5189:   void (*f)(void) = NULL;
5190:   char   dir_prefix[256], neu_prefix[256], str_level[16];
5191:   size_t len;

5193:   PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0);
5194:   /* approximate solver, propagate NearNullSpace if needed */
5195:   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5196:     MatNullSpace gnnsp1, gnnsp2;
5197:     PetscBool    lhas, ghas;

5199:     MatGetNearNullSpace(pcbddc->local_mat, &nnsp);
5200:     MatGetNearNullSpace(pc->pmat, &gnnsp1);
5201:     MatGetNullSpace(pc->pmat, &gnnsp2);
5202:     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5203:     MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
5204:     if (!ghas && (gnnsp1 || gnnsp2)) MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL);
5205:   }

5207:   /* compute prefixes */
5208:   PetscStrcpy(dir_prefix, "");
5209:   PetscStrcpy(neu_prefix, "");
5210:   if (!pcbddc->current_level) {
5211:     PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix));
5212:     PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix));
5213:     PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix));
5214:     PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix));
5215:   } else {
5216:     PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level));
5217:     PetscStrlen(((PetscObject)pc)->prefix, &len);
5218:     len -= 15;                                /* remove "pc_bddc_coarse_" */
5219:     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5220:     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5221:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5222:     PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1);
5223:     PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1);
5224:     PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix));
5225:     PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix));
5226:     PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix));
5227:     PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix));
5228:   }

5230:   /* DIRICHLET PROBLEM */
5231:   if (dirichlet) {
5232:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5233:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5235:       if (pcbddc->dbg_flag) {
5236:         Mat A_IIn;

5238:         PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn);
5239:         MatDestroy(&pcis->A_II);
5240:         pcis->A_II = A_IIn;
5241:       }
5242:     }
5243:     MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym);
5244:     if (isset) MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym);

5246:     /* Matrix for Dirichlet problem is pcis->A_II */
5247:     n_D  = pcis->n - pcis->n_B;
5248:     opts = PETSC_FALSE;
5249:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5250:       opts = PETSC_TRUE;
5251:       KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D);
5252:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1);
5253:       /* default */
5254:       KSPSetType(pcbddc->ksp_D, KSPPREONLY);
5255:       KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix);
5256:       PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij);
5257:       KSPGetPC(pcbddc->ksp_D, &pc_temp);
5258:       if (issbaij) {
5259:         PCSetType(pc_temp, PCCHOLESKY);
5260:       } else {
5261:         PCSetType(pc_temp, PCLU);
5262:       }
5263:       KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure);
5264:     }
5265:     MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix);
5266:     KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II);
5267:     /* Allow user's customization */
5268:     if (opts) KSPSetFromOptions(pcbddc->ksp_D);
5269:     MatGetNearNullSpace(pcis->pA_II, &nnsp);
5270:     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5271:       MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II);
5272:     }
5273:     MatGetNearNullSpace(pcis->pA_II, &nnsp);
5274:     KSPGetPC(pcbddc->ksp_D, &pc_temp);
5275:     PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f);
5276:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5277:       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5278:       const PetscInt *idxs;
5279:       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;

5281:       ISGetLocalSize(pcis->is_I_local, &nl);
5282:       ISGetIndices(pcis->is_I_local, &idxs);
5283:       PetscMalloc1(nl * cdim, &scoords);
5284:       for (i = 0; i < nl; i++) {
5285:         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5286:       }
5287:       ISRestoreIndices(pcis->is_I_local, &idxs);
5288:       PCSetCoordinates(pc_temp, cdim, nl, scoords);
5289:       PetscFree(scoords);
5290:     }
5291:     if (sub_schurs && sub_schurs->reuse_solver) {
5292:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5294:       KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver);
5295:     }

5297:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5298:     if (!n_D) {
5299:       KSPGetPC(pcbddc->ksp_D, &pc_temp);
5300:       PCSetType(pc_temp, PCNONE);
5301:     }
5302:     KSPSetUp(pcbddc->ksp_D);
5303:     /* set ksp_D into pcis data */
5304:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
5305:     KSPDestroy(&pcis->ksp_D);
5306:     pcis->ksp_D = pcbddc->ksp_D;
5307:   }

5309:   /* NEUMANN PROBLEM */
5310:   A_RR = NULL;
5311:   if (neumann) {
5312:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5313:     PetscInt        ibs, mbs;
5314:     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5315:     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;

5317:     reuse_neumann_solver = PETSC_FALSE;
5318:     if (sub_schurs && sub_schurs->reuse_solver) {
5319:       IS iP;

5321:       reuse_neumann_solver = PETSC_TRUE;
5322:       PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP);
5323:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5324:     }
5325:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5326:     ISGetSize(pcbddc->is_R_local, &n_R);
5327:     if (pcbddc->ksp_R) { /* already created ksp */
5328:       PetscInt nn_R;
5329:       KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR);
5330:       PetscObjectReference((PetscObject)A_RR);
5331:       MatGetSize(A_RR, &nn_R, NULL);
5332:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5333:         KSPReset(pcbddc->ksp_R);
5334:         MatDestroy(&A_RR);
5335:         reuse = MAT_INITIAL_MATRIX;
5336:       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5337:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5338:           MatDestroy(&A_RR);
5339:           reuse = MAT_INITIAL_MATRIX;
5340:         } else { /* safe to reuse the matrix */
5341:           reuse = MAT_REUSE_MATRIX;
5342:         }
5343:       }
5344:       /* last check */
5345:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5346:         MatDestroy(&A_RR);
5347:         reuse = MAT_INITIAL_MATRIX;
5348:       }
5349:     } else { /* first time, so we need to create the matrix */
5350:       reuse = MAT_INITIAL_MATRIX;
5351:     }
5352:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5353:        TODO: Get Rid of these conversions */
5354:     MatGetBlockSize(pcbddc->local_mat, &mbs);
5355:     ISGetBlockSize(pcbddc->is_R_local, &ibs);
5356:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij);
5357:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5358:       if (matis->A == pcbddc->local_mat) {
5359:         MatDestroy(&pcbddc->local_mat);
5360:         MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat);
5361:       } else {
5362:         MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat);
5363:       }
5364:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5365:       if (matis->A == pcbddc->local_mat) {
5366:         MatDestroy(&pcbddc->local_mat);
5367:         MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat);
5368:       } else {
5369:         MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat);
5370:       }
5371:     }
5372:     /* extract A_RR */
5373:     if (reuse_neumann_solver) {
5374:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5376:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5377:         MatDestroy(&A_RR);
5378:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5379:           PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR);
5380:         } else {
5381:           MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR);
5382:         }
5383:       } else {
5384:         MatDestroy(&A_RR);
5385:         PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL);
5386:         PetscObjectReference((PetscObject)A_RR);
5387:       }
5388:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5389:       MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR);
5390:     }
5391:     MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym);
5392:     if (isset) MatSetOption(A_RR, MAT_SYMMETRIC, issym);
5393:     opts = PETSC_FALSE;
5394:     if (!pcbddc->ksp_R) { /* create object if not present */
5395:       opts = PETSC_TRUE;
5396:       KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R);
5397:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1);
5398:       /* default */
5399:       KSPSetType(pcbddc->ksp_R, KSPPREONLY);
5400:       KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix);
5401:       KSPGetPC(pcbddc->ksp_R, &pc_temp);
5402:       PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij);
5403:       if (issbaij) {
5404:         PCSetType(pc_temp, PCCHOLESKY);
5405:       } else {
5406:         PCSetType(pc_temp, PCLU);
5407:       }
5408:       KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure);
5409:     }
5410:     KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR);
5411:     MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix);
5412:     if (opts) { /* Allow user's customization once */
5413:       KSPSetFromOptions(pcbddc->ksp_R);
5414:     }
5415:     MatGetNearNullSpace(A_RR, &nnsp);
5416:     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5417:       MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR);
5418:     }
5419:     MatGetNearNullSpace(A_RR, &nnsp);
5420:     KSPGetPC(pcbddc->ksp_R, &pc_temp);
5421:     PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f);
5422:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5423:       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5424:       const PetscInt *idxs;
5425:       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;

5427:       ISGetLocalSize(pcbddc->is_R_local, &nl);
5428:       ISGetIndices(pcbddc->is_R_local, &idxs);
5429:       PetscMalloc1(nl * cdim, &scoords);
5430:       for (i = 0; i < nl; i++) {
5431:         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5432:       }
5433:       ISRestoreIndices(pcbddc->is_R_local, &idxs);
5434:       PCSetCoordinates(pc_temp, cdim, nl, scoords);
5435:       PetscFree(scoords);
5436:     }

5438:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5439:     if (!n_R) {
5440:       KSPGetPC(pcbddc->ksp_R, &pc_temp);
5441:       PCSetType(pc_temp, PCNONE);
5442:     }
5443:     /* Reuse solver if it is present */
5444:     if (reuse_neumann_solver) {
5445:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5447:       KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver);
5448:     }
5449:     KSPSetUp(pcbddc->ksp_R);
5450:   }

5452:   if (pcbddc->dbg_flag) {
5453:     PetscViewerFlush(pcbddc->dbg_viewer);
5454:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5455:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
5456:   }
5457:   PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0);

5459:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5460:   if (pcbddc->NullSpace_corr[0]) PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE);
5461:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]);
5462:   if (neumann && pcbddc->NullSpace_corr[2]) PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]);
5463:   /* check Dirichlet and Neumann solvers */
5464:   if (pcbddc->dbg_flag) {
5465:     if (dirichlet) { /* Dirichlet */
5466:       VecSetRandom(pcis->vec1_D, NULL);
5467:       MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D);
5468:       KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D);
5469:       KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D);
5470:       VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D);
5471:       VecNorm(pcis->vec1_D, NORM_INFINITY, &value);
5472:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value);
5473:       PetscViewerFlush(pcbddc->dbg_viewer);
5474:     }
5475:     if (neumann) { /* Neumann */
5476:       VecSetRandom(pcbddc->vec1_R, NULL);
5477:       MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R);
5478:       KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R);
5479:       KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R);
5480:       VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R);
5481:       VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value);
5482:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value);
5483:       PetscViewerFlush(pcbddc->dbg_viewer);
5484:     }
5485:   }
5486:   /* free Neumann problem's matrix */
5487:   MatDestroy(&A_RR);
5488:   return 0;
5489: }

5491: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5492: {
5493:   PC_BDDC        *pcbddc       = (PC_BDDC *)(pc->data);
5494:   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
5495:   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;

5497:   if (!reuse_solver) VecSet(pcbddc->vec1_R, 0.);
5498:   if (!pcbddc->switch_static) {
5499:     if (applytranspose && pcbddc->local_auxmat1) {
5500:       MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C);
5501:       MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B);
5502:     }
5503:     if (!reuse_solver) {
5504:       VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5505:       VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5506:     } else {
5507:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5509:       VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD);
5510:       VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD);
5511:     }
5512:   } else {
5513:     VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5514:     VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5515:     VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5516:     VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5517:     if (applytranspose && pcbddc->local_auxmat1) {
5518:       MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C);
5519:       MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B);
5520:       VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5521:       VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE);
5522:     }
5523:   }
5524:   PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0);
5525:   if (!reuse_solver || pcbddc->switch_static) {
5526:     if (applytranspose) {
5527:       KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R);
5528:     } else {
5529:       KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R);
5530:     }
5531:     KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R);
5532:   } else {
5533:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5535:     if (applytranspose) {
5536:       MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B);
5537:     } else {
5538:       MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B);
5539:     }
5540:   }
5541:   PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0);
5542:   VecSet(inout_B, 0.);
5543:   if (!pcbddc->switch_static) {
5544:     if (!reuse_solver) {
5545:       VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5546:       VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5547:     } else {
5548:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5550:       VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE);
5551:       VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE);
5552:     }
5553:     if (!applytranspose && pcbddc->local_auxmat1) {
5554:       MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C);
5555:       MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B);
5556:     }
5557:   } else {
5558:     VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5559:     VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5560:     VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD);
5561:     VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD);
5562:     if (!applytranspose && pcbddc->local_auxmat1) {
5563:       MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C);
5564:       MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R);
5565:     }
5566:     VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5567:     VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD);
5568:     VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD);
5569:     VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD);
5570:   }
5571:   return 0;
5572: }

5574: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5575: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5576: {
5577:   PC_BDDC          *pcbddc = (PC_BDDC *)(pc->data);
5578:   PC_IS            *pcis   = (PC_IS *)(pc->data);
5579:   const PetscScalar zero   = 0.0;

5581:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5582:   if (!pcbddc->benign_apply_coarse_only) {
5583:     if (applytranspose) {
5584:       MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P);
5585:       if (pcbddc->switch_static) MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P);
5586:     } else {
5587:       MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P);
5588:       if (pcbddc->switch_static) MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P);
5589:     }
5590:   } else {
5591:     VecSet(pcbddc->vec1_P, zero);
5592:   }

5594:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5595:   if (pcbddc->benign_n) {
5596:     PetscScalar *array;
5597:     PetscInt     j;

5599:     VecGetArray(pcbddc->vec1_P, &array);
5600:     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
5601:     VecRestoreArray(pcbddc->vec1_P, &array);
5602:   }

5604:   /* start communications from local primal nodes to rhs of coarse solver */
5605:   VecSet(pcbddc->coarse_vec, zero);
5606:   PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD);
5607:   PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD);

5609:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5610:   if (pcbddc->coarse_ksp) {
5611:     Mat          coarse_mat;
5612:     Vec          rhs, sol;
5613:     MatNullSpace nullsp;
5614:     PetscBool    isbddc = PETSC_FALSE;

5616:     if (pcbddc->benign_have_null) {
5617:       PC coarse_pc;

5619:       KSPGetPC(pcbddc->coarse_ksp, &coarse_pc);
5620:       PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc);
5621:       /* we need to propagate to coarser levels the need for a possible benign correction */
5622:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5623:         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)(coarse_pc->data);
5624:         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
5625:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5626:       }
5627:     }
5628:     KSPGetRhs(pcbddc->coarse_ksp, &rhs);
5629:     KSPGetSolution(pcbddc->coarse_ksp, &sol);
5630:     KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL);
5631:     if (applytranspose) {
5633:       PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0);
5634:       KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol);
5635:       PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0);
5636:       KSPCheckSolve(pcbddc->coarse_ksp, pc, sol);
5637:       MatGetTransposeNullSpace(coarse_mat, &nullsp);
5638:       if (nullsp) MatNullSpaceRemove(nullsp, sol);
5639:     } else {
5640:       MatGetNullSpace(coarse_mat, &nullsp);
5641:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5642:         PC coarse_pc;

5644:         if (nullsp) MatNullSpaceRemove(nullsp, rhs);
5645:         KSPGetPC(pcbddc->coarse_ksp, &coarse_pc);
5646:         PCPreSolve(coarse_pc, pcbddc->coarse_ksp);
5647:         PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol);
5648:         PCPostSolve(coarse_pc, pcbddc->coarse_ksp);
5649:       } else {
5650:         PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0);
5651:         KSPSolve(pcbddc->coarse_ksp, rhs, sol);
5652:         PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0);
5653:         KSPCheckSolve(pcbddc->coarse_ksp, pc, sol);
5654:         if (nullsp) MatNullSpaceRemove(nullsp, sol);
5655:       }
5656:     }
5657:     /* we don't need the benign correction at coarser levels anymore */
5658:     if (pcbddc->benign_have_null && isbddc) {
5659:       PC       coarse_pc;
5660:       PC_BDDC *coarsepcbddc;

5662:       KSPGetPC(pcbddc->coarse_ksp, &coarse_pc);
5663:       coarsepcbddc                           = (PC_BDDC *)(coarse_pc->data);
5664:       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
5665:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5666:     }
5667:   }

5669:   /* Local solution on R nodes */
5670:   if (pcis->n && !pcbddc->benign_apply_coarse_only) PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose);
5671:   /* communications from coarse sol to local primal nodes */
5672:   PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE);
5673:   PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE);

5675:   /* Sum contributions from the two levels */
5676:   if (!pcbddc->benign_apply_coarse_only) {
5677:     if (applytranspose) {
5678:       MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B);
5679:       if (pcbddc->switch_static) MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D);
5680:     } else {
5681:       MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B);
5682:       if (pcbddc->switch_static) MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D);
5683:     }
5684:     /* store p0 */
5685:     if (pcbddc->benign_n) {
5686:       PetscScalar *array;
5687:       PetscInt     j;

5689:       VecGetArray(pcbddc->vec1_P, &array);
5690:       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
5691:       VecRestoreArray(pcbddc->vec1_P, &array);
5692:     }
5693:   } else { /* expand the coarse solution */
5694:     if (applytranspose) {
5695:       MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B);
5696:     } else {
5697:       MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B);
5698:     }
5699:   }
5700:   return 0;
5701: }

5703: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
5704: {
5705:   PC_BDDC           *pcbddc = (PC_BDDC *)(pc->data);
5706:   Vec                from, to;
5707:   const PetscScalar *array;

5709:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5710:     from = pcbddc->coarse_vec;
5711:     to   = pcbddc->vec1_P;
5712:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5713:       Vec tvec;

5715:       KSPGetRhs(pcbddc->coarse_ksp, &tvec);
5716:       VecResetArray(tvec);
5717:       KSPGetSolution(pcbddc->coarse_ksp, &tvec);
5718:       VecGetArrayRead(tvec, &array);
5719:       VecPlaceArray(from, array);
5720:       VecRestoreArrayRead(tvec, &array);
5721:     }
5722:   } else { /* from local to global -> put data in coarse right hand side */
5723:     from = pcbddc->vec1_P;
5724:     to   = pcbddc->coarse_vec;
5725:   }
5726:   VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode);
5727:   return 0;
5728: }

5730: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5731: {
5732:   PC_BDDC           *pcbddc = (PC_BDDC *)(pc->data);
5733:   Vec                from, to;
5734:   const PetscScalar *array;

5736:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5737:     from = pcbddc->coarse_vec;
5738:     to   = pcbddc->vec1_P;
5739:   } else { /* from local to global -> put data in coarse right hand side */
5740:     from = pcbddc->vec1_P;
5741:     to   = pcbddc->coarse_vec;
5742:   }
5743:   VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode);
5744:   if (smode == SCATTER_FORWARD) {
5745:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5746:       Vec tvec;

5748:       KSPGetRhs(pcbddc->coarse_ksp, &tvec);
5749:       VecGetArrayRead(to, &array);
5750:       VecPlaceArray(tvec, array);
5751:       VecRestoreArrayRead(to, &array);
5752:     }
5753:   } else {
5754:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5755:       VecResetArray(from);
5756:     }
5757:   }
5758:   return 0;
5759: }

5761: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5762: {
5763:   PC_IS   *pcis   = (PC_IS *)(pc->data);
5764:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5765:   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
5766:   /* one and zero */
5767:   PetscScalar one = 1.0, zero = 0.0;
5768:   /* space to store constraints and their local indices */
5769:   PetscScalar *constraints_data;
5770:   PetscInt    *constraints_idxs, *constraints_idxs_B;
5771:   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
5772:   PetscInt    *constraints_n;
5773:   /* iterators */
5774:   PetscInt i, j, k, total_counts, total_counts_cc, cum;
5775:   /* BLAS integers */
5776:   PetscBLASInt lwork, lierr;
5777:   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
5778:   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
5779:   /* reuse */
5780:   PetscInt  olocal_primal_size, olocal_primal_size_cc;
5781:   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
5782:   /* change of basis */
5783:   PetscBool qr_needed;
5784:   PetscBT   change_basis, qr_needed_idx;
5785:   /* auxiliary stuff */
5786:   PetscInt *nnz, *is_indices;
5787:   PetscInt  ncc;
5788:   /* some quantities */
5789:   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
5790:   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
5791:   PetscReal tol; /* tolerance for retaining eigenmodes */

5793:   tol = PetscSqrtReal(PETSC_SMALL);
5794:   /* Destroy Mat objects computed previously */
5795:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5796:   MatDestroy(&pcbddc->ConstraintMatrix);
5797:   MatDestroy(&pcbddc->switch_static_change);
5798:   /* save info on constraints from previous setup (if any) */
5799:   olocal_primal_size    = pcbddc->local_primal_size;
5800:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5801:   PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult);
5802:   PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc);
5803:   PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc);
5804:   PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult);
5805:   PetscFree(pcbddc->primal_indices_local_idxs);

5807:   if (!pcbddc->adaptive_selection) {
5808:     IS           ISForVertices, *ISForFaces, *ISForEdges;
5809:     MatNullSpace nearnullsp;
5810:     const Vec   *nearnullvecs;
5811:     Vec         *localnearnullsp;
5812:     PetscScalar *array;
5813:     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
5814:     PetscBool    nnsp_has_cnst;
5815:     /* LAPACK working arrays for SVD or POD */
5816:     PetscBool    skip_lapack, boolforchange;
5817:     PetscScalar *work;
5818:     PetscReal   *singular_vals;
5819: #if defined(PETSC_USE_COMPLEX)
5820:     PetscReal *rwork;
5821: #endif
5822:     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
5823:     PetscBLASInt dummy_int    = 1;
5824:     PetscScalar  dummy_scalar = 1.;
5825:     PetscBool    use_pod      = PETSC_FALSE;

5827:     /* MKL SVD with same input gives different results on different processes! */
5828: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
5829:     use_pod = PETSC_TRUE;
5830: #endif
5831:     /* Get index sets for faces, edges and vertices from graph */
5832:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices);
5833:     o_nf       = n_ISForFaces;
5834:     o_ne       = n_ISForEdges;
5835:     n_vertices = 0;
5836:     if (ISForVertices) ISGetSize(ISForVertices, &n_vertices);
5837:     /* print some info */
5838:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5839:       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
5840:       PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer);
5841:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5842:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n");
5843:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices);
5844:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges);
5845:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces);
5846:       PetscViewerFlush(pcbddc->dbg_viewer);
5847:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5848:     }

5850:     if (!pcbddc->use_vertices) n_vertices = 0;
5851:     if (!pcbddc->use_edges) n_ISForEdges = 0;
5852:     if (!pcbddc->use_faces) n_ISForFaces = 0;

5854:     /* check if near null space is attached to global mat */
5855:     if (pcbddc->use_nnsp) {
5856:       MatGetNearNullSpace(pc->pmat, &nearnullsp);
5857:     } else nearnullsp = NULL;

5859:     if (nearnullsp) {
5860:       MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs);
5861:       /* remove any stored info */
5862:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
5863:       PetscFree(pcbddc->onearnullvecs_state);
5864:       /* store information for BDDC solver reuse */
5865:       PetscObjectReference((PetscObject)nearnullsp);
5866:       pcbddc->onearnullspace = nearnullsp;
5867:       PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state);
5868:       for (i = 0; i < nnsp_size; i++) PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]);
5869:     } else { /* if near null space is not provided BDDC uses constants by default */
5870:       nnsp_size     = 0;
5871:       nnsp_has_cnst = PETSC_TRUE;
5872:     }
5873:     /* get max number of constraints on a single cc */
5874:     max_constraints = nnsp_size;
5875:     if (nnsp_has_cnst) max_constraints++;

5877:     /*
5878:          Evaluate maximum storage size needed by the procedure
5879:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5880:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5881:          There can be multiple constraints per connected component
5882:                                                                                                                                                            */
5883:     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
5884:     PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n);

5886:     total_counts = n_ISForFaces + n_ISForEdges;
5887:     total_counts *= max_constraints;
5888:     total_counts += n_vertices;
5889:     PetscBTCreate(total_counts, &change_basis);

5891:     total_counts           = 0;
5892:     max_size_of_constraint = 0;
5893:     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
5894:       IS used_is;
5895:       if (i < n_ISForEdges) {
5896:         used_is = ISForEdges[i];
5897:       } else {
5898:         used_is = ISForFaces[i - n_ISForEdges];
5899:       }
5900:       ISGetSize(used_is, &j);
5901:       total_counts += j;
5902:       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
5903:     }
5904:     PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B);

5906:     /* get local part of global near null space vectors */
5907:     PetscMalloc1(nnsp_size, &localnearnullsp);
5908:     for (k = 0; k < nnsp_size; k++) {
5909:       VecDuplicate(pcis->vec1_N, &localnearnullsp[k]);
5910:       VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD);
5911:       VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD);
5912:     }

5914:     /* whether or not to skip lapack calls */
5915:     skip_lapack = PETSC_TRUE;
5916:     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

5918:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5919:     if (!skip_lapack) {
5920:       PetscScalar temp_work;

5922:       if (use_pod) {
5923:         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5924:         PetscMalloc1(max_constraints * max_constraints, &correlation_mat);
5925:         PetscMalloc1(max_constraints, &singular_vals);
5926:         PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis);
5927: #if defined(PETSC_USE_COMPLEX)
5928:         PetscMalloc1(3 * max_constraints, &rwork);
5929: #endif
5930:         /* now we evaluate the optimal workspace using query with lwork=-1 */
5931:         PetscBLASIntCast(max_constraints, &Blas_N);
5932:         PetscBLASIntCast(max_constraints, &Blas_LDA);
5933:         lwork = -1;
5934:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5935: #if !defined(PETSC_USE_COMPLEX)
5936:         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
5937: #else
5938:         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
5939: #endif
5940:         PetscFPTrapPop();
5942:       } else {
5943: #if !defined(PETSC_MISSING_LAPACK_GESVD)
5944:         /* SVD */
5945:         PetscInt max_n, min_n;
5946:         max_n = max_size_of_constraint;
5947:         min_n = max_constraints;
5948:         if (max_size_of_constraint < max_constraints) {
5949:           min_n = max_size_of_constraint;
5950:           max_n = max_constraints;
5951:         }
5952:         PetscMalloc1(min_n, &singular_vals);
5953:   #if defined(PETSC_USE_COMPLEX)
5954:         PetscMalloc1(5 * min_n, &rwork);
5955:   #endif
5956:         /* now we evaluate the optimal workspace using query with lwork=-1 */
5957:         lwork = -1;
5958:         PetscBLASIntCast(max_n, &Blas_M);
5959:         PetscBLASIntCast(min_n, &Blas_N);
5960:         PetscBLASIntCast(max_n, &Blas_LDA);
5961:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5962:   #if !defined(PETSC_USE_COMPLEX)
5963:         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
5964:   #else
5965:         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
5966:   #endif
5967:         PetscFPTrapPop();
5969: #else
5970:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
5971: #endif /* on missing GESVD */
5972:       }
5973:       /* Allocate optimal workspace */
5974:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork);
5975:       PetscMalloc1(lwork, &work);
5976:     }
5977:     /* Now we can loop on constraining sets */
5978:     total_counts            = 0;
5979:     constraints_idxs_ptr[0] = 0;
5980:     constraints_data_ptr[0] = 0;
5981:     /* vertices */
5982:     if (n_vertices) {
5983:       ISGetIndices(ISForVertices, (const PetscInt **)&is_indices);
5984:       PetscArraycpy(constraints_idxs, is_indices, n_vertices);
5985:       for (i = 0; i < n_vertices; i++) {
5986:         constraints_n[total_counts]            = 1;
5987:         constraints_data[total_counts]         = 1.0;
5988:         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
5989:         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
5990:         total_counts++;
5991:       }
5992:       ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices);
5993:     }

5995:     /* edges and faces */
5996:     total_counts_cc = total_counts;
5997:     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
5998:       IS        used_is;
5999:       PetscBool idxs_copied = PETSC_FALSE;

6001:       if (ncc < n_ISForEdges) {
6002:         used_is       = ISForEdges[ncc];
6003:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6004:       } else {
6005:         used_is       = ISForFaces[ncc - n_ISForEdges];
6006:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6007:       }
6008:       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */

6010:       ISGetSize(used_is, &size_of_constraint);
6011:       if (!size_of_constraint) continue;
6012:       ISGetIndices(used_is, (const PetscInt **)&is_indices);
6013:       /* change of basis should not be performed on local periodic nodes */
6014:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6015:       if (nnsp_has_cnst) {
6016:         PetscScalar quad_value;

6018:         PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint);
6019:         idxs_copied = PETSC_TRUE;

6021:         if (!pcbddc->use_nnsp_true) {
6022:           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6023:         } else {
6024:           quad_value = 1.0;
6025:         }
6026:         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6027:         temp_constraints++;
6028:         total_counts++;
6029:       }
6030:       for (k = 0; k < nnsp_size; k++) {
6031:         PetscReal    real_value;
6032:         PetscScalar *ptr_to_data;

6034:         VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array);
6035:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6036:         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6037:         VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array);
6038:         /* check if array is null on the connected component */
6039:         PetscBLASIntCast(size_of_constraint, &Blas_N);
6040:         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6041:         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6042:           temp_constraints++;
6043:           total_counts++;
6044:           if (!idxs_copied) {
6045:             PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint);
6046:             idxs_copied = PETSC_TRUE;
6047:           }
6048:         }
6049:       }
6050:       ISRestoreIndices(used_is, (const PetscInt **)&is_indices);
6051:       valid_constraints = temp_constraints;
6052:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6053:         if (temp_constraints == 1) { /* just normalize the constraint */
6054:           PetscScalar norm, *ptr_to_data;

6056:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6057:           PetscBLASIntCast(size_of_constraint, &Blas_N);
6058:           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6059:           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6060:           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6061:         } else { /* perform SVD */
6062:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6064:           if (use_pod) {
6065:             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6066:                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6067:                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6068:                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6069:                   from that computed using LAPACKgesvd
6070:                -> This is due to a different computation of eigenvectors in LAPACKheev
6071:                -> The quality of the POD-computed basis will be the same */
6072:             PetscArrayzero(correlation_mat, temp_constraints * temp_constraints);
6073:             /* Store upper triangular part of correlation matrix */
6074:             PetscBLASIntCast(size_of_constraint, &Blas_N);
6075:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6076:             for (j = 0; j < temp_constraints; j++) {
6077:               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6078:             }
6079:             /* compute eigenvalues and eigenvectors of correlation matrix */
6080:             PetscBLASIntCast(temp_constraints, &Blas_N);
6081:             PetscBLASIntCast(temp_constraints, &Blas_LDA);
6082: #if !defined(PETSC_USE_COMPLEX)
6083:             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6084: #else
6085:             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6086: #endif
6087:             PetscFPTrapPop();
6089:             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6090:             j = 0;
6091:             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6092:             total_counts      = total_counts - j;
6093:             valid_constraints = temp_constraints - j;
6094:             /* scale and copy POD basis into used quadrature memory */
6095:             PetscBLASIntCast(size_of_constraint, &Blas_M);
6096:             PetscBLASIntCast(temp_constraints, &Blas_N);
6097:             PetscBLASIntCast(temp_constraints, &Blas_K);
6098:             PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6099:             PetscBLASIntCast(temp_constraints, &Blas_LDB);
6100:             PetscBLASIntCast(size_of_constraint, &Blas_LDC);
6101:             if (j < temp_constraints) {
6102:               PetscInt ii;
6103:               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6104:               PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6105:               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6106:               PetscFPTrapPop();
6107:               for (k = 0; k < temp_constraints - j; k++) {
6108:                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6109:               }
6110:             }
6111:           } else {
6112: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6113:             PetscBLASIntCast(size_of_constraint, &Blas_M);
6114:             PetscBLASIntCast(temp_constraints, &Blas_N);
6115:             PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6116:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6117:   #if !defined(PETSC_USE_COMPLEX)
6118:             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6119:   #else
6120:             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6121:   #endif
6123:             PetscFPTrapPop();
6124:             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6125:             k = temp_constraints;
6126:             if (k > size_of_constraint) k = size_of_constraint;
6127:             j = 0;
6128:             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6129:             valid_constraints = k - j;
6130:             total_counts      = total_counts - temp_constraints + valid_constraints;
6131: #else
6132:             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6133: #endif /* on missing GESVD */
6134:           }
6135:         }
6136:       }
6137:       /* update pointers information */
6138:       if (valid_constraints) {
6139:         constraints_n[total_counts_cc]            = valid_constraints;
6140:         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6141:         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6142:         /* set change_of_basis flag */
6143:         if (boolforchange) PetscBTSet(change_basis, total_counts_cc);
6144:         total_counts_cc++;
6145:       }
6146:     }
6147:     /* free workspace */
6148:     if (!skip_lapack) {
6149:       PetscFree(work);
6150: #if defined(PETSC_USE_COMPLEX)
6151:       PetscFree(rwork);
6152: #endif
6153:       PetscFree(singular_vals);
6154:       PetscFree(correlation_mat);
6155:       PetscFree(temp_basis);
6156:     }
6157:     for (k = 0; k < nnsp_size; k++) VecDestroy(&localnearnullsp[k]);
6158:     PetscFree(localnearnullsp);
6159:     /* free index sets of faces, edges and vertices */
6160:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices);
6161:   } else {
6162:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6164:     total_counts = 0;
6165:     n_vertices   = 0;
6166:     if (sub_schurs->is_vertices && pcbddc->use_vertices) ISGetLocalSize(sub_schurs->is_vertices, &n_vertices);
6167:     max_constraints = 0;
6168:     total_counts_cc = 0;
6169:     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6170:       total_counts += pcbddc->adaptive_constraints_n[i];
6171:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6172:       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6173:     }
6174:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6175:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6176:     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6177:     constraints_data     = pcbddc->adaptive_constraints_data;
6178:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6179:     PetscMalloc1(total_counts_cc, &constraints_n);
6180:     total_counts_cc = 0;
6181:     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6182:       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6183:     }

6185:     max_size_of_constraint = 0;
6186:     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6187:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B);
6188:     /* Change of basis */
6189:     PetscBTCreate(total_counts_cc, &change_basis);
6190:     if (pcbddc->use_change_of_basis) {
6191:       for (i = 0; i < sub_schurs->n_subs; i++) {
6192:         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscBTSet(change_basis, i + n_vertices);
6193:       }
6194:     }
6195:   }
6196:   pcbddc->local_primal_size = total_counts;
6197:   PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs);

6199:   /* map constraints_idxs in boundary numbering */
6200:   if (pcbddc->use_change_of_basis) {
6201:     ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B);
6203:   }

6205:   /* Create constraint matrix */
6206:   MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix);
6207:   MatSetType(pcbddc->ConstraintMatrix, MATAIJ);
6208:   MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n);

6210:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6211:   /* determine if a QR strategy is needed for change of basis */
6212:   qr_needed = pcbddc->use_qr_single;
6213:   PetscBTCreate(total_counts_cc, &qr_needed_idx);
6214:   total_primal_vertices        = 0;
6215:   pcbddc->local_primal_size_cc = 0;
6216:   for (i = 0; i < total_counts_cc; i++) {
6217:     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6218:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6219:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6220:       pcbddc->local_primal_size_cc += 1;
6221:     } else if (PetscBTLookup(change_basis, i)) {
6222:       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6223:       pcbddc->local_primal_size_cc += constraints_n[i];
6224:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6225:         PetscBTSet(qr_needed_idx, i);
6226:         qr_needed = PETSC_TRUE;
6227:       }
6228:     } else {
6229:       pcbddc->local_primal_size_cc += 1;
6230:     }
6231:   }
6232:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6233:   pcbddc->n_vertices = total_primal_vertices;
6234:   /* permute indices in order to have a sorted set of vertices */
6235:   PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs);
6236:   PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult);
6237:   PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices);
6238:   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;

6240:   /* nonzero structure of constraint matrix */
6241:   /* and get reference dof for local constraints */
6242:   PetscMalloc1(pcbddc->local_primal_size, &nnz);
6243:   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;

6245:   j            = total_primal_vertices;
6246:   total_counts = total_primal_vertices;
6247:   cum          = total_primal_vertices;
6248:   for (i = n_vertices; i < total_counts_cc; i++) {
6249:     if (!PetscBTLookup(change_basis, i)) {
6250:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6251:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6252:       cum++;
6253:       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6254:       for (k = 0; k < constraints_n[i]; k++) {
6255:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6256:         nnz[j + k]                                        = size_of_constraint;
6257:       }
6258:       j += constraints_n[i];
6259:     }
6260:   }
6261:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz);
6262:   MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
6263:   PetscFree(nnz);

6265:   /* set values in constraint matrix */
6266:   for (i = 0; i < total_primal_vertices; i++) MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES);
6267:   total_counts = total_primal_vertices;
6268:   for (i = n_vertices; i < total_counts_cc; i++) {
6269:     if (!PetscBTLookup(change_basis, i)) {
6270:       PetscInt *cols;

6272:       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6273:       cols               = constraints_idxs + constraints_idxs_ptr[i];
6274:       for (k = 0; k < constraints_n[i]; k++) {
6275:         PetscInt     row = total_counts + k;
6276:         PetscScalar *vals;

6278:         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6279:         MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES);
6280:       }
6281:       total_counts += constraints_n[i];
6282:     }
6283:   }
6284:   /* assembling */
6285:   MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY);
6286:   MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY);
6287:   MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view");

6289:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6290:   if (pcbddc->use_change_of_basis) {
6291:     /* dual and primal dofs on a single cc */
6292:     PetscInt dual_dofs, primal_dofs;
6293:     /* working stuff for GEQRF */
6294:     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6295:     PetscBLASInt lqr_work;
6296:     /* working stuff for UNGQR */
6297:     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6298:     PetscBLASInt lgqr_work;
6299:     /* working stuff for TRTRS */
6300:     PetscScalar *trs_rhs = NULL;
6301:     PetscBLASInt Blas_NRHS;
6302:     /* pointers for values insertion into change of basis matrix */
6303:     PetscInt    *start_rows, *start_cols;
6304:     PetscScalar *start_vals;
6305:     /* working stuff for values insertion */
6306:     PetscBT   is_primal;
6307:     PetscInt *aux_primal_numbering_B;
6308:     /* matrix sizes */
6309:     PetscInt global_size, local_size;
6310:     /* temporary change of basis */
6311:     Mat localChangeOfBasisMatrix;
6312:     /* extra space for debugging */
6313:     PetscScalar *dbg_work = NULL;

6315:     MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix);
6316:     MatSetType(localChangeOfBasisMatrix, MATAIJ);
6317:     MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n);
6318:     /* nonzeros for local mat */
6319:     PetscMalloc1(pcis->n, &nnz);
6320:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6321:       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6322:     } else {
6323:       const PetscInt *ii;
6324:       PetscInt        n;
6325:       PetscBool       flg_row;
6326:       MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row);
6327:       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6328:       MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row);
6329:     }
6330:     for (i = n_vertices; i < total_counts_cc; i++) {
6331:       if (PetscBTLookup(change_basis, i)) {
6332:         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6333:         if (PetscBTLookup(qr_needed_idx, i)) {
6334:           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6335:         } else {
6336:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6337:           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6338:         }
6339:       }
6340:     }
6341:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz);
6342:     MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
6343:     PetscFree(nnz);
6344:     /* Set interior change in the matrix */
6345:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6346:       for (i = 0; i < pcis->n; i++) MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES);
6347:     } else {
6348:       const PetscInt *ii, *jj;
6349:       PetscScalar    *aa;
6350:       PetscInt        n;
6351:       PetscBool       flg_row;
6352:       MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row);
6353:       MatSeqAIJGetArray(pcbddc->benign_change, &aa);
6354:       for (i = 0; i < n; i++) MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES);
6355:       MatSeqAIJRestoreArray(pcbddc->benign_change, &aa);
6356:       MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row);
6357:     }

6359:     if (pcbddc->dbg_flag) {
6360:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n");
6361:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank);
6362:     }

6364:     /* Now we loop on the constraints which need a change of basis */
6365:     /*
6366:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6367:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6369:        Basic blocks of change of basis matrix T computed:

6371:           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6373:             | 1        0   ...        0         s_1/S |
6374:             | 0        1   ...        0         s_2/S |
6375:             |              ...                        |
6376:             | 0        ...            1     s_{n-1}/S |
6377:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6379:             with S = \sum_{i=1}^n s_i^2
6380:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6381:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6383:           - QR decomposition of constraints otherwise
6384:     */
6385:     if (qr_needed && max_size_of_constraint) {
6386:       /* space to store Q */
6387:       PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis);
6388:       /* array to store scaling factors for reflectors */
6389:       PetscMalloc1(max_constraints, &qr_tau);
6390:       /* first we issue queries for optimal work */
6391:       PetscBLASIntCast(max_size_of_constraint, &Blas_M);
6392:       PetscBLASIntCast(max_constraints, &Blas_N);
6393:       PetscBLASIntCast(max_size_of_constraint, &Blas_LDA);
6394:       lqr_work = -1;
6395:       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
6397:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work);
6398:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t), &qr_work);
6399:       lgqr_work = -1;
6400:       PetscBLASIntCast(max_size_of_constraint, &Blas_M);
6401:       PetscBLASIntCast(max_size_of_constraint, &Blas_N);
6402:       PetscBLASIntCast(max_constraints, &Blas_K);
6403:       PetscBLASIntCast(max_size_of_constraint, &Blas_LDA);
6404:       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
6405:       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
6407:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work);
6408:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t), &gqr_work);
6409:       /* array to store rhs and solution of triangular solver */
6410:       PetscMalloc1(max_constraints * max_constraints, &trs_rhs);
6411:       /* allocating workspace for check */
6412:       if (pcbddc->dbg_flag) PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work);
6413:     }
6414:     /* array to store whether a node is primal or not */
6415:     PetscBTCreate(pcis->n_B, &is_primal);
6416:     PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B);
6417:     ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B);
6419:     for (i = 0; i < total_primal_vertices; i++) PetscBTSet(is_primal, aux_primal_numbering_B[i]);
6420:     PetscFree(aux_primal_numbering_B);

6422:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6423:     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
6424:       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
6425:       if (PetscBTLookup(change_basis, total_counts)) {
6426:         /* get constraint info */
6427:         primal_dofs = constraints_n[total_counts];
6428:         dual_dofs   = size_of_constraint - primal_dofs;

6430:         if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint);

6432:         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */

6434:           /* copy quadrature constraints for change of basis check */
6435:           if (pcbddc->dbg_flag) PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs);
6436:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6437:           PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs);

6439:           /* compute QR decomposition of constraints */
6440:           PetscBLASIntCast(size_of_constraint, &Blas_M);
6441:           PetscBLASIntCast(primal_dofs, &Blas_N);
6442:           PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6443:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6444:           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
6446:           PetscFPTrapPop();

6448:           /* explicitly compute R^-T */
6449:           PetscArrayzero(trs_rhs, primal_dofs * primal_dofs);
6450:           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
6451:           PetscBLASIntCast(primal_dofs, &Blas_N);
6452:           PetscBLASIntCast(primal_dofs, &Blas_NRHS);
6453:           PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6454:           PetscBLASIntCast(primal_dofs, &Blas_LDB);
6455:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6456:           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
6458:           PetscFPTrapPop();

6460:           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6461:           PetscBLASIntCast(size_of_constraint, &Blas_M);
6462:           PetscBLASIntCast(size_of_constraint, &Blas_N);
6463:           PetscBLASIntCast(primal_dofs, &Blas_K);
6464:           PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6465:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6466:           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
6468:           PetscFPTrapPop();

6470:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6471:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6472:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6473:           PetscBLASIntCast(size_of_constraint, &Blas_M);
6474:           PetscBLASIntCast(primal_dofs, &Blas_N);
6475:           PetscBLASIntCast(primal_dofs, &Blas_K);
6476:           PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6477:           PetscBLASIntCast(primal_dofs, &Blas_LDB);
6478:           PetscBLASIntCast(size_of_constraint, &Blas_LDC);
6479:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6480:           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
6481:           PetscFPTrapPop();
6482:           PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs);

6484:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6485:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6486:           /* insert cols for primal dofs */
6487:           for (j = 0; j < primal_dofs; j++) {
6488:             start_vals = &qr_basis[j * size_of_constraint];
6489:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6490:             MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES);
6491:           }
6492:           /* insert cols for dual dofs */
6493:           for (j = 0, k = 0; j < dual_dofs; k++) {
6494:             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
6495:               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
6496:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6497:               MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES);
6498:               j++;
6499:             }
6500:           }

6502:           /* check change of basis */
6503:           if (pcbddc->dbg_flag) {
6504:             PetscInt  ii, jj;
6505:             PetscBool valid_qr = PETSC_TRUE;
6506:             PetscBLASIntCast(primal_dofs, &Blas_M);
6507:             PetscBLASIntCast(size_of_constraint, &Blas_N);
6508:             PetscBLASIntCast(size_of_constraint, &Blas_K);
6509:             PetscBLASIntCast(size_of_constraint, &Blas_LDA);
6510:             PetscBLASIntCast(size_of_constraint, &Blas_LDB);
6511:             PetscBLASIntCast(primal_dofs, &Blas_LDC);
6512:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6513:             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
6514:             PetscFPTrapPop();
6515:             for (jj = 0; jj < size_of_constraint; jj++) {
6516:               for (ii = 0; ii < primal_dofs; ii++) {
6517:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6518:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6519:               }
6520:             }
6521:             if (!valid_qr) {
6522:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n");
6523:               for (jj = 0; jj < size_of_constraint; jj++) {
6524:                 for (ii = 0; ii < primal_dofs; ii++) {
6525:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
6526:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]));
6527:                   }
6528:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
6529:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]));
6530:                   }
6531:                 }
6532:               }
6533:             } else {
6534:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n");
6535:             }
6536:           }
6537:         } else { /* simple transformation block */
6538:           PetscInt    row, col;
6539:           PetscScalar val, norm;

6541:           PetscBLASIntCast(size_of_constraint, &Blas_N);
6542:           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
6543:           for (j = 0; j < size_of_constraint; j++) {
6544:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
6545:             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6546:             if (!PetscBTLookup(is_primal, row_B)) {
6547:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6548:               MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES);
6549:               MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES);
6550:             } else {
6551:               for (k = 0; k < size_of_constraint; k++) {
6552:                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6553:                 if (row != col) {
6554:                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
6555:                 } else {
6556:                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
6557:                 }
6558:                 MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES);
6559:               }
6560:             }
6561:           }
6562:           if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n");
6563:         }
6564:       } else {
6565:         if (pcbddc->dbg_flag) PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint);
6566:       }
6567:     }

6569:     /* free workspace */
6570:     if (qr_needed) {
6571:       if (pcbddc->dbg_flag) PetscFree(dbg_work);
6572:       PetscFree(trs_rhs);
6573:       PetscFree(qr_tau);
6574:       PetscFree(qr_work);
6575:       PetscFree(gqr_work);
6576:       PetscFree(qr_basis);
6577:     }
6578:     PetscBTDestroy(&is_primal);
6579:     MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY);
6580:     MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY);

6582:     /* assembling of global change of variable */
6583:     if (!pcbddc->fake_change) {
6584:       Mat      tmat;
6585:       PetscInt bs;

6587:       VecGetSize(pcis->vec1_global, &global_size);
6588:       VecGetLocalSize(pcis->vec1_global, &local_size);
6589:       MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat);
6590:       MatISSetLocalMat(tmat, localChangeOfBasisMatrix);
6591:       MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY);
6592:       MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY);
6593:       MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix);
6594:       MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ);
6595:       MatGetBlockSize(pc->pmat, &bs);
6596:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs);
6597:       MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size);
6598:       MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE);
6599:       MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix);
6600:       MatDestroy(&tmat);
6601:       VecSet(pcis->vec1_global, 0.0);
6602:       VecSet(pcis->vec1_N, 1.0);
6603:       VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
6604:       VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
6605:       VecReciprocal(pcis->vec1_global);
6606:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL);

6608:       /* check */
6609:       if (pcbddc->dbg_flag) {
6610:         PetscReal error;
6611:         Vec       x, x_change;

6613:         VecDuplicate(pcis->vec1_global, &x);
6614:         VecDuplicate(pcis->vec1_global, &x_change);
6615:         VecSetRandom(x, NULL);
6616:         VecCopy(x, pcis->vec1_global);
6617:         VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD);
6618:         VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD);
6619:         MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N);
6620:         VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE);
6621:         VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE);
6622:         MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change);
6623:         VecAXPY(x, -1.0, x_change);
6624:         VecNorm(x, NORM_INFINITY, &error);
6626:         VecDestroy(&x);
6627:         VecDestroy(&x_change);
6628:       }
6629:       /* adapt sub_schurs computed (if any) */
6630:       if (pcbddc->use_deluxe_scaling) {
6631:         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6634:         if (sub_schurs && sub_schurs->S_Ej_all) {
6635:           Mat S_new, tmat;
6636:           IS  is_all_N, is_V_Sall = NULL;

6638:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N);
6639:           MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat);
6640:           if (pcbddc->deluxe_zerorows) {
6641:             ISLocalToGlobalMapping NtoSall;
6642:             IS                     is_V;
6643:             ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V);
6644:             ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall);
6645:             ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall);
6646:             ISLocalToGlobalMappingDestroy(&NtoSall);
6647:             ISDestroy(&is_V);
6648:           }
6649:           ISDestroy(&is_all_N);
6650:           MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new);
6651:           MatDestroy(&sub_schurs->S_Ej_all);
6652:           PetscObjectReference((PetscObject)S_new);
6653:           if (pcbddc->deluxe_zerorows) {
6654:             const PetscScalar *array;
6655:             const PetscInt    *idxs_V, *idxs_all;
6656:             PetscInt           i, n_V;

6658:             MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL);
6659:             ISGetLocalSize(is_V_Sall, &n_V);
6660:             ISGetIndices(is_V_Sall, &idxs_V);
6661:             ISGetIndices(sub_schurs->is_Ej_all, &idxs_all);
6662:             VecGetArrayRead(pcis->D, &array);
6663:             for (i = 0; i < n_V; i++) {
6664:               PetscScalar val;
6665:               PetscInt    idx;

6667:               idx = idxs_V[i];
6668:               val = array[idxs_all[idxs_V[i]]];
6669:               MatSetValue(S_new, idx, idx, val, INSERT_VALUES);
6670:             }
6671:             MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY);
6672:             MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY);
6673:             VecRestoreArrayRead(pcis->D, &array);
6674:             ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all);
6675:             ISRestoreIndices(is_V_Sall, &idxs_V);
6676:           }
6677:           sub_schurs->S_Ej_all = S_new;
6678:           MatDestroy(&S_new);
6679:           if (sub_schurs->sum_S_Ej_all) {
6680:             MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new);
6681:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6682:             PetscObjectReference((PetscObject)S_new);
6683:             if (pcbddc->deluxe_zerorows) MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL);
6684:             sub_schurs->sum_S_Ej_all = S_new;
6685:             MatDestroy(&S_new);
6686:           }
6687:           ISDestroy(&is_V_Sall);
6688:           MatDestroy(&tmat);
6689:         }
6690:         /* destroy any change of basis context in sub_schurs */
6691:         if (sub_schurs && sub_schurs->change) {
6692:           PetscInt i;

6694:           for (i = 0; i < sub_schurs->n_subs; i++) KSPDestroy(&sub_schurs->change[i]);
6695:           PetscFree(sub_schurs->change);
6696:         }
6697:       }
6698:       if (pcbddc->switch_static) { /* need to save the local change */
6699:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6700:       } else {
6701:         MatDestroy(&localChangeOfBasisMatrix);
6702:       }
6703:       /* determine if any process has changed the pressures locally */
6704:       pcbddc->change_interior = pcbddc->benign_have_null;
6705:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6706:       MatDestroy(&pcbddc->ConstraintMatrix);
6707:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6708:       pcbddc->use_qr_single    = qr_needed;
6709:     }
6710:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6711:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6712:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6713:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6714:     } else {
6715:       Mat benign_global = NULL;
6716:       if (pcbddc->benign_have_null) {
6717:         Mat M;

6719:         pcbddc->change_interior = PETSC_TRUE;
6720:         VecCopy(matis->counter, pcis->vec1_N);
6721:         VecReciprocal(pcis->vec1_N);
6722:         MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global);
6723:         if (pcbddc->benign_change) {
6724:           MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M);
6725:           MatDiagonalScale(M, pcis->vec1_N, NULL);
6726:         } else {
6727:           MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M);
6728:           MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES);
6729:         }
6730:         MatISSetLocalMat(benign_global, M);
6731:         MatDestroy(&M);
6732:         MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY);
6733:         MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY);
6734:       }
6735:       if (pcbddc->user_ChangeOfBasisMatrix) {
6736:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix);
6737:         MatDestroy(&benign_global);
6738:       } else if (pcbddc->benign_have_null) {
6739:         pcbddc->ChangeOfBasisMatrix = benign_global;
6740:       }
6741:     }
6742:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6743:       IS              is_global;
6744:       const PetscInt *gidxs;

6746:       ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs);
6747:       ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global);
6748:       ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs);
6749:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change);
6750:       ISDestroy(&is_global);
6751:     }
6752:   }
6753:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) VecDuplicate(pcis->vec1_global, &pcbddc->work_change);

6755:   if (!pcbddc->fake_change) {
6756:     /* add pressure dofs to set of primal nodes for numbering purposes */
6757:     for (i = 0; i < pcbddc->benign_n; i++) {
6758:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
6759:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6760:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
6761:       pcbddc->local_primal_size_cc++;
6762:       pcbddc->local_primal_size++;
6763:     }

6765:     /* check if a new primal space has been introduced (also take into account benign trick) */
6766:     pcbddc->new_primal_space_local = PETSC_TRUE;
6767:     if (olocal_primal_size == pcbddc->local_primal_size) {
6768:       PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local);
6769:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6770:       if (!pcbddc->new_primal_space_local) {
6771:         PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local);
6772:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6773:       }
6774:     }
6775:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6776:     MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
6777:   }
6778:   PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult);

6780:   /* flush dbg viewer */
6781:   if (pcbddc->dbg_flag) PetscViewerFlush(pcbddc->dbg_viewer);

6783:   /* free workspace */
6784:   PetscBTDestroy(&qr_needed_idx);
6785:   PetscBTDestroy(&change_basis);
6786:   if (!pcbddc->adaptive_selection) {
6787:     PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n);
6788:     PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B);
6789:   } else {
6790:     PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data);
6791:     PetscFree(constraints_n);
6792:     PetscFree(constraints_idxs_B);
6793:   }
6794:   return 0;
6795: }

6797: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6798: {
6799:   ISLocalToGlobalMapping map;
6800:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
6801:   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
6802:   PetscInt               i, N;
6803:   PetscBool              rcsr = PETSC_FALSE;

6805:   if (pcbddc->recompute_topography) {
6806:     pcbddc->graphanalyzed = PETSC_FALSE;
6807:     /* Reset previously computed graph */
6808:     PCBDDCGraphReset(pcbddc->mat_graph);
6809:     /* Init local Graph struct */
6810:     MatGetSize(pc->pmat, &N, NULL);
6811:     MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL);
6812:     PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount);

6814:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local);
6815:     /* Check validity of the csr graph passed in by the user */
6817:                pcbddc->mat_graph->nvtxs);

6819:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6820:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6821:       PetscInt *xadj, *adjncy;
6822:       PetscInt  nvtxs;
6823:       PetscBool flg_row = PETSC_FALSE;

6825:       MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row);
6826:       if (flg_row) {
6827:         PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES);
6828:         pcbddc->computed_rowadj = PETSC_TRUE;
6829:       }
6830:       MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row);
6831:       rcsr = PETSC_TRUE;
6832:     }
6833:     if (pcbddc->dbg_flag) PetscViewerFlush(pcbddc->dbg_viewer);

6835:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6836:       PetscReal   *lcoords;
6837:       PetscInt     n;
6838:       MPI_Datatype dimrealtype;

6840:       /* TODO: support for blocked */
6842:       MatGetLocalSize(matis->A, &n, NULL);
6843:       PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords);
6844:       MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype);
6845:       MPI_Type_commit(&dimrealtype);
6846:       PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE);
6847:       PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE);
6848:       MPI_Type_free(&dimrealtype);
6849:       PetscFree(pcbddc->mat_graph->coords);

6851:       pcbddc->mat_graph->coords = lcoords;
6852:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
6853:       pcbddc->mat_graph->cnloc  = n;
6854:     }
6856:                pcbddc->mat_graph->nvtxs);
6857:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);

6859:     /* Setup of Graph */
6860:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6861:     PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local);

6863:     /* attach info on disconnected subdomains if present */
6864:     if (pcbddc->n_local_subs) {
6865:       PetscInt *local_subs, n, totn;

6867:       MatGetLocalSize(matis->A, &n, NULL);
6868:       PetscMalloc1(n, &local_subs);
6869:       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
6870:       for (i = 0; i < pcbddc->n_local_subs; i++) {
6871:         const PetscInt *idxs;
6872:         PetscInt        nl, j;

6874:         ISGetLocalSize(pcbddc->local_subs[i], &nl);
6875:         ISGetIndices(pcbddc->local_subs[i], &idxs);
6876:         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
6877:         ISRestoreIndices(pcbddc->local_subs[i], &idxs);
6878:       }
6879:       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
6880:       pcbddc->mat_graph->n_local_subs = totn + 1;
6881:       pcbddc->mat_graph->local_subs   = local_subs;
6882:     }
6883:   }

6885:   if (!pcbddc->graphanalyzed) {
6886:     /* Graph's connected components analysis */
6887:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6888:     pcbddc->graphanalyzed   = PETSC_TRUE;
6889:     pcbddc->corner_selected = pcbddc->corner_selection;
6890:   }
6891:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6892:   return 0;
6893: }

6895: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
6896: {
6897:   PetscInt     i, j, n;
6898:   PetscScalar *alphas;
6899:   PetscReal    norm, *onorms;

6901:   n = *nio;
6902:   if (!n) return 0;
6903:   PetscMalloc2(n, &alphas, n, &onorms);
6904:   VecNormalize(vecs[0], &norm);
6905:   if (norm < PETSC_SMALL) {
6906:     onorms[0] = 0.0;
6907:     VecSet(vecs[0], 0.0);
6908:   } else {
6909:     onorms[0] = norm;
6910:   }

6912:   for (i = 1; i < n; i++) {
6913:     VecMDot(vecs[i], i, vecs, alphas);
6914:     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
6915:     VecMAXPY(vecs[i], i, alphas, vecs);
6916:     VecNormalize(vecs[i], &norm);
6917:     if (norm < PETSC_SMALL) {
6918:       onorms[i] = 0.0;
6919:       VecSet(vecs[i], 0.0);
6920:     } else {
6921:       onorms[i] = norm;
6922:     }
6923:   }
6924:   /* push nonzero vectors at the beginning */
6925:   for (i = 0; i < n; i++) {
6926:     if (onorms[i] == 0.0) {
6927:       for (j = i + 1; j < n; j++) {
6928:         if (onorms[j] != 0.0) {
6929:           VecCopy(vecs[j], vecs[i]);
6930:           onorms[j] = 0.0;
6931:         }
6932:       }
6933:     }
6934:   }
6935:   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
6936:   PetscFree2(alphas, onorms);
6937:   return 0;
6938: }

6940: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
6941: {
6942:   ISLocalToGlobalMapping mapping;
6943:   Mat                    A;
6944:   PetscInt               n_neighs, *neighs, *n_shared, **shared;
6945:   PetscMPIInt            size, rank, color;
6946:   PetscInt              *xadj, *adjncy;
6947:   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
6948:   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
6949:   PetscInt               void_procs, *procs_candidates = NULL;
6950:   PetscInt               xadj_count, *count;
6951:   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
6952:   PetscSubcomm           psubcomm;
6953:   MPI_Comm               subcomm;

6956:   PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis);

6962:   if (have_void) *have_void = PETSC_FALSE;
6963:   MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size);
6964:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank);
6965:   MatISGetLocalMat(mat, &A);
6966:   MatGetLocalSize(A, &n, NULL);
6967:   im_active = !!n;
6968:   MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat));
6969:   void_procs = size - active_procs;
6970:   /* get ranks of of non-active processes in mat communicator */
6971:   if (void_procs) {
6972:     PetscInt ncand;

6974:     if (have_void) *have_void = PETSC_TRUE;
6975:     PetscMalloc1(size, &procs_candidates);
6976:     MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat));
6977:     for (i = 0, ncand = 0; i < size; i++) {
6978:       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
6979:     }
6980:     /* force n_subdomains to be not greater that the number of non-active processes */
6981:     *n_subdomains = PetscMin(void_procs, *n_subdomains);
6982:   }

6984:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
6985:      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
6986:   MatGetSize(mat, &N, NULL);
6987:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
6988:     PetscInt issize, isidx, dest;
6989:     if (*n_subdomains == 1) dest = 0;
6990:     else dest = rank;
6991:     if (im_active) {
6992:       issize = 1;
6993:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
6994:         isidx = procs_candidates[dest];
6995:       } else {
6996:         isidx = dest;
6997:       }
6998:     } else {
6999:       issize = 0;
7000:       isidx  = -1;
7001:     }
7002:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7003:     ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends);
7004:     PetscFree(procs_candidates);
7005:     return 0;
7006:   }
7007:   PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL);
7008:   PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL);
7009:   threshold = PetscMax(threshold, 2);

7011:   /* Get info on mapping */
7012:   MatISGetLocalToGlobalMapping(mat, &mapping, NULL);
7013:   ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared);

7015:   /* build local CSR graph of subdomains' connectivity */
7016:   PetscMalloc1(2, &xadj);
7017:   xadj[0] = 0;
7018:   xadj[1] = PetscMax(n_neighs - 1, 0);
7019:   PetscMalloc1(xadj[1], &adjncy);
7020:   PetscMalloc1(xadj[1], &adjncy_wgt);
7021:   PetscCalloc1(n, &count);
7022:   for (i = 1; i < n_neighs; i++)
7023:     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;

7025:   xadj_count = 0;
7026:   for (i = 1; i < n_neighs; i++) {
7027:     for (j = 0; j < n_shared[i]; j++) {
7028:       if (count[shared[i][j]] < threshold) {
7029:         adjncy[xadj_count]     = neighs[i];
7030:         adjncy_wgt[xadj_count] = n_shared[i];
7031:         xadj_count++;
7032:         break;
7033:       }
7034:     }
7035:   }
7036:   xadj[1] = xadj_count;
7037:   PetscFree(count);
7038:   ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared);
7039:   PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt);

7041:   PetscMalloc1(1, &ranks_send_to_idx);

7043:   /* Restrict work on active processes only */
7044:   PetscMPIIntCast(im_active, &color);
7045:   if (void_procs) {
7046:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm);
7047:     PetscSubcommSetNumber(psubcomm, 2); /* 2 groups, active process and not active processes */
7048:     PetscSubcommSetTypeGeneral(psubcomm, color, rank);
7049:     subcomm = PetscSubcommChild(psubcomm);
7050:   } else {
7051:     psubcomm = NULL;
7052:     subcomm  = PetscObjectComm((PetscObject)mat);
7053:   }

7055:   v_wgt = NULL;
7056:   if (!color) {
7057:     PetscFree(xadj);
7058:     PetscFree(adjncy);
7059:     PetscFree(adjncy_wgt);
7060:   } else {
7061:     Mat             subdomain_adj;
7062:     IS              new_ranks, new_ranks_contig;
7063:     MatPartitioning partitioner;
7064:     PetscInt        rstart = 0, rend = 0;
7065:     PetscInt       *is_indices, *oldranks;
7066:     PetscMPIInt     size;
7067:     PetscBool       aggregate;

7069:     MPI_Comm_size(subcomm, &size);
7070:     if (void_procs) {
7071:       PetscInt prank = rank;
7072:       PetscMalloc1(size, &oldranks);
7073:       MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm);
7074:       for (i = 0; i < xadj[1]; i++) PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]);
7075:       PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt);
7076:     } else {
7077:       oldranks = NULL;
7078:     }
7079:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7080:     if (aggregate) { /* TODO: all this part could be made more efficient */
7081:       PetscInt     lrows, row, ncols, *cols;
7082:       PetscMPIInt  nrank;
7083:       PetscScalar *vals;

7085:       MPI_Comm_rank(subcomm, &nrank);
7086:       lrows = 0;
7087:       if (nrank < redprocs) {
7088:         lrows = size / redprocs;
7089:         if (nrank < size % redprocs) lrows++;
7090:       }
7091:       MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj);
7092:       MatGetOwnershipRange(subdomain_adj, &rstart, &rend);
7093:       MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
7094:       MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
7095:       row   = nrank;
7096:       ncols = xadj[1] - xadj[0];
7097:       cols  = adjncy;
7098:       PetscMalloc1(ncols, &vals);
7099:       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7100:       MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES);
7101:       MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY);
7102:       MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY);
7103:       PetscFree(xadj);
7104:       PetscFree(adjncy);
7105:       PetscFree(adjncy_wgt);
7106:       PetscFree(vals);
7107:       if (use_vwgt) {
7108:         Vec                v;
7109:         const PetscScalar *array;
7110:         PetscInt           nl;

7112:         MatCreateVecs(subdomain_adj, &v, NULL);
7113:         VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES);
7114:         VecAssemblyBegin(v);
7115:         VecAssemblyEnd(v);
7116:         VecGetLocalSize(v, &nl);
7117:         VecGetArrayRead(v, &array);
7118:         PetscMalloc1(nl, &v_wgt);
7119:         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7120:         VecRestoreArrayRead(v, &array);
7121:         VecDestroy(&v);
7122:       }
7123:     } else {
7124:       MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj);
7125:       if (use_vwgt) {
7126:         PetscMalloc1(1, &v_wgt);
7127:         v_wgt[0] = n;
7128:       }
7129:     }
7130:     /* MatView(subdomain_adj,0); */

7132:     /* Partition */
7133:     MatPartitioningCreate(subcomm, &partitioner);
7134: #if defined(PETSC_HAVE_PTSCOTCH)
7135:     MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH);
7136: #elif defined(PETSC_HAVE_PARMETIS)
7137:     MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS);
7138: #else
7139:     MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE);
7140: #endif
7141:     MatPartitioningSetAdjacency(partitioner, subdomain_adj);
7142:     if (v_wgt) MatPartitioningSetVertexWeights(partitioner, v_wgt);
7143:     *n_subdomains = PetscMin((PetscInt)size, *n_subdomains);
7144:     MatPartitioningSetNParts(partitioner, *n_subdomains);
7145:     MatPartitioningSetFromOptions(partitioner);
7146:     MatPartitioningApply(partitioner, &new_ranks);
7147:     /* MatPartitioningView(partitioner,0); */

7149:     /* renumber new_ranks to avoid "holes" in new set of processors */
7150:     ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig);
7151:     ISDestroy(&new_ranks);
7152:     ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices);
7153:     if (!aggregate) {
7154:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7155:         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7156:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7157:       } else if (oldranks) {
7158:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7159:       } else {
7160:         ranks_send_to_idx[0] = is_indices[0];
7161:       }
7162:     } else {
7163:       PetscInt     idx = 0;
7164:       PetscMPIInt  tag;
7165:       MPI_Request *reqs;

7167:       PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag);
7168:       PetscMalloc1(rend - rstart, &reqs);
7169:       for (i = rstart; i < rend; i++) MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]);
7170:       MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE);
7171:       MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE);
7172:       PetscFree(reqs);
7173:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7174:         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7175:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7176:       } else if (oldranks) {
7177:         ranks_send_to_idx[0] = oldranks[idx];
7178:       } else {
7179:         ranks_send_to_idx[0] = idx;
7180:       }
7181:     }
7182:     ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices);
7183:     /* clean up */
7184:     PetscFree(oldranks);
7185:     ISDestroy(&new_ranks_contig);
7186:     MatDestroy(&subdomain_adj);
7187:     MatPartitioningDestroy(&partitioner);
7188:   }
7189:   PetscSubcommDestroy(&psubcomm);
7190:   PetscFree(procs_candidates);

7192:   /* assemble parallel IS for sends */
7193:   i = 1;
7194:   if (!color) i = 0;
7195:   ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends);
7196:   return 0;
7197: }

7199: typedef enum {
7200:   MATDENSE_PRIVATE = 0,
7201:   MATAIJ_PRIVATE,
7202:   MATBAIJ_PRIVATE,
7203:   MATSBAIJ_PRIVATE
7204: } MatTypePrivate;

7206: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7207: {
7208:   Mat                    local_mat;
7209:   IS                     is_sends_internal;
7210:   PetscInt               rows, cols, new_local_rows;
7211:   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7212:   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7213:   ISLocalToGlobalMapping l2gmap;
7214:   PetscInt              *l2gmap_indices;
7215:   const PetscInt        *is_indices;
7216:   MatType                new_local_type;
7217:   /* buffers */
7218:   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7219:   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7220:   PetscInt          *recv_buffer_idxs_local;
7221:   PetscScalar       *ptr_vals, *recv_buffer_vals;
7222:   const PetscScalar *send_buffer_vals;
7223:   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7224:   /* MPI */
7225:   MPI_Comm     comm, comm_n;
7226:   PetscSubcomm subcomm;
7227:   PetscMPIInt  n_sends, n_recvs, size;
7228:   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7229:   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7230:   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7231:   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7232:   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;

7235:   PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis);
7243:   if (nvecs) {
7246:   }
7247:   /* further checks */
7248:   MatISGetLocalMat(mat, &local_mat);
7249:   PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense);
7251:   MatGetSize(local_mat, &rows, &cols);
7253:   if (reuse && *mat_n) {
7254:     PetscInt mrows, mcols, mnrows, mncols;
7256:     PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis);
7258:     MatGetSize(mat, &mrows, &mcols);
7259:     MatGetSize(*mat_n, &mnrows, &mncols);
7262:   }
7263:   MatGetBlockSize(local_mat, &bs);

7266:   /* prepare IS for sending if not provided */
7267:   if (!is_sends) {
7269:     PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL);
7270:   } else {
7271:     PetscObjectReference((PetscObject)is_sends);
7272:     is_sends_internal = is_sends;
7273:   }

7275:   /* get comm */
7276:   PetscObjectGetComm((PetscObject)mat, &comm);

7278:   /* compute number of sends */
7279:   ISGetLocalSize(is_sends_internal, &i);
7280:   PetscMPIIntCast(i, &n_sends);

7282:   /* compute number of receives */
7283:   MPI_Comm_size(comm, &size);
7284:   PetscMalloc1(size, &iflags);
7285:   PetscArrayzero(iflags, size);
7286:   ISGetIndices(is_sends_internal, &is_indices);
7287:   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7288:   PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs);
7289:   PetscFree(iflags);

7291:   /* restrict comm if requested */
7292:   subcomm     = NULL;
7293:   destroy_mat = PETSC_FALSE;
7294:   if (restrict_comm) {
7295:     PetscMPIInt color, subcommsize;

7297:     color = 0;
7298:     if (restrict_full) {
7299:       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7300:     } else {
7301:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7302:     }
7303:     MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm);
7304:     subcommsize = size - subcommsize;
7305:     /* check if reuse has been requested */
7306:     if (reuse) {
7307:       if (*mat_n) {
7308:         PetscMPIInt subcommsize2;
7309:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2);
7311:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7312:       } else {
7313:         comm_n = PETSC_COMM_SELF;
7314:       }
7315:     } else { /* MAT_INITIAL_MATRIX */
7316:       PetscMPIInt rank;

7318:       MPI_Comm_rank(comm, &rank);
7319:       PetscSubcommCreate(comm, &subcomm);
7320:       PetscSubcommSetNumber(subcomm, 2);
7321:       PetscSubcommSetTypeGeneral(subcomm, color, rank);
7322:       comm_n = PetscSubcommChild(subcomm);
7323:     }
7324:     /* flag to destroy *mat_n if not significative */
7325:     if (color) destroy_mat = PETSC_TRUE;
7326:   } else {
7327:     comm_n = comm;
7328:   }

7330:   /* prepare send/receive buffers */
7331:   PetscMalloc1(size, &ilengths_idxs);
7332:   PetscArrayzero(ilengths_idxs, size);
7333:   PetscMalloc1(size, &ilengths_vals);
7334:   PetscArrayzero(ilengths_vals, size);
7335:   if (nis) PetscCalloc1(size, &ilengths_idxs_is);

7337:   /* Get data from local matrices */
7339:   /* TODO: See below some guidelines on how to prepare the local buffers */
7340:   /*
7341:        send_buffer_vals should contain the raw values of the local matrix
7342:        send_buffer_idxs should contain:
7343:        - MatType_PRIVATE type
7344:        - PetscInt        size_of_l2gmap
7345:        - PetscInt        global_row_indices[size_of_l2gmap]
7346:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7347:     */
7348:   {
7349:     ISLocalToGlobalMapping mapping;

7351:     MatISGetLocalToGlobalMapping(mat, &mapping, NULL);
7352:     MatDenseGetArrayRead(local_mat, &send_buffer_vals);
7353:     ISLocalToGlobalMappingGetSize(mapping, &i);
7354:     PetscMalloc1(i + 2, &send_buffer_idxs);
7355:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7356:     send_buffer_idxs[1] = i;
7357:     ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs);
7358:     PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i);
7359:     ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs);
7360:     PetscMPIIntCast(i, &len);
7361:     for (i = 0; i < n_sends; i++) {
7362:       ilengths_vals[is_indices[i]] = len * len;
7363:       ilengths_idxs[is_indices[i]] = len + 2;
7364:     }
7365:   }
7366:   PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals);
7367:   /* additional is (if any) */
7368:   if (nis) {
7369:     PetscMPIInt psum;
7370:     PetscInt    j;
7371:     for (j = 0, psum = 0; j < nis; j++) {
7372:       PetscInt plen;
7373:       ISGetLocalSize(isarray[j], &plen);
7374:       PetscMPIIntCast(plen, &len);
7375:       psum += len + 1; /* indices + length */
7376:     }
7377:     PetscMalloc1(psum, &send_buffer_idxs_is);
7378:     for (j = 0, psum = 0; j < nis; j++) {
7379:       PetscInt        plen;
7380:       const PetscInt *is_array_idxs;
7381:       ISGetLocalSize(isarray[j], &plen);
7382:       send_buffer_idxs_is[psum] = plen;
7383:       ISGetIndices(isarray[j], &is_array_idxs);
7384:       PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen);
7385:       ISRestoreIndices(isarray[j], &is_array_idxs);
7386:       psum += plen + 1; /* indices + length */
7387:     }
7388:     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
7389:     PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is);
7390:   }
7391:   MatISRestoreLocalMat(mat, &local_mat);

7393:   buf_size_idxs    = 0;
7394:   buf_size_vals    = 0;
7395:   buf_size_idxs_is = 0;
7396:   buf_size_vecs    = 0;
7397:   for (i = 0; i < n_recvs; i++) {
7398:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7399:     buf_size_vals += (PetscInt)olengths_vals[i];
7400:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7401:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7402:   }
7403:   PetscMalloc1(buf_size_idxs, &recv_buffer_idxs);
7404:   PetscMalloc1(buf_size_vals, &recv_buffer_vals);
7405:   PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is);
7406:   PetscMalloc1(buf_size_vecs, &recv_buffer_vecs);

7408:   /* get new tags for clean communications */
7409:   PetscObjectGetNewTag((PetscObject)mat, &tag_idxs);
7410:   PetscObjectGetNewTag((PetscObject)mat, &tag_vals);
7411:   PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is);
7412:   PetscObjectGetNewTag((PetscObject)mat, &tag_vecs);

7414:   /* allocate for requests */
7415:   PetscMalloc1(n_sends, &send_req_idxs);
7416:   PetscMalloc1(n_sends, &send_req_vals);
7417:   PetscMalloc1(n_sends, &send_req_idxs_is);
7418:   PetscMalloc1(n_sends, &send_req_vecs);
7419:   PetscMalloc1(n_recvs, &recv_req_idxs);
7420:   PetscMalloc1(n_recvs, &recv_req_vals);
7421:   PetscMalloc1(n_recvs, &recv_req_idxs_is);
7422:   PetscMalloc1(n_recvs, &recv_req_vecs);

7424:   /* communications */
7425:   ptr_idxs    = recv_buffer_idxs;
7426:   ptr_vals    = recv_buffer_vals;
7427:   ptr_idxs_is = recv_buffer_idxs_is;
7428:   ptr_vecs    = recv_buffer_vecs;
7429:   for (i = 0; i < n_recvs; i++) {
7430:     source_dest = onodes[i];
7431:     MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i]);
7432:     MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i]);
7433:     ptr_idxs += olengths_idxs[i];
7434:     ptr_vals += olengths_vals[i];
7435:     if (nis) {
7436:       source_dest = onodes_is[i];
7437:       MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i]);
7438:       ptr_idxs_is += olengths_idxs_is[i];
7439:     }
7440:     if (nvecs) {
7441:       source_dest = onodes[i];
7442:       MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i]);
7443:       ptr_vecs += olengths_idxs[i] - 2;
7444:     }
7445:   }
7446:   for (i = 0; i < n_sends; i++) {
7447:     PetscMPIIntCast(is_indices[i], &source_dest);
7448:     MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]);
7449:     MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]);
7450:     if (nis) MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]);
7451:     if (nvecs) {
7452:       VecGetArray(nnsp_vec[0], &send_buffer_vecs);
7453:       MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]);
7454:     }
7455:   }
7456:   ISRestoreIndices(is_sends_internal, &is_indices);
7457:   ISDestroy(&is_sends_internal);

7459:   /* assemble new l2g map */
7460:   MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE);
7461:   ptr_idxs       = recv_buffer_idxs;
7462:   new_local_rows = 0;
7463:   for (i = 0; i < n_recvs; i++) {
7464:     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7465:     ptr_idxs += olengths_idxs[i];
7466:   }
7467:   PetscMalloc1(new_local_rows, &l2gmap_indices);
7468:   ptr_idxs       = recv_buffer_idxs;
7469:   new_local_rows = 0;
7470:   for (i = 0; i < n_recvs; i++) {
7471:     PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1));
7472:     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7473:     ptr_idxs += olengths_idxs[i];
7474:   }
7475:   PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices);
7476:   ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap);
7477:   PetscFree(l2gmap_indices);

7479:   /* infer new local matrix type from received local matrices type */
7480:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7481:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7482:   if (n_recvs) {
7483:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7484:     ptr_idxs                              = recv_buffer_idxs;
7485:     for (i = 0; i < n_recvs; i++) {
7486:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7487:         new_local_type_private = MATAIJ_PRIVATE;
7488:         break;
7489:       }
7490:       ptr_idxs += olengths_idxs[i];
7491:     }
7492:     switch (new_local_type_private) {
7493:     case MATDENSE_PRIVATE:
7494:       new_local_type = MATSEQAIJ;
7495:       bs             = 1;
7496:       break;
7497:     case MATAIJ_PRIVATE:
7498:       new_local_type = MATSEQAIJ;
7499:       bs             = 1;
7500:       break;
7501:     case MATBAIJ_PRIVATE:
7502:       new_local_type = MATSEQBAIJ;
7503:       break;
7504:     case MATSBAIJ_PRIVATE:
7505:       new_local_type = MATSEQSBAIJ;
7506:       break;
7507:     default:
7508:       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
7509:     }
7510:   } else { /* by default, new_local_type is seqaij */
7511:     new_local_type = MATSEQAIJ;
7512:     bs             = 1;
7513:   }

7515:   /* create MATIS object if needed */
7516:   if (!reuse) {
7517:     MatGetSize(mat, &rows, &cols);
7518:     MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n);
7519:   } else {
7520:     /* it also destroys the local matrices */
7521:     if (*mat_n) {
7522:       MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap);
7523:     } else { /* this is a fake object */
7524:       MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n);
7525:     }
7526:   }
7527:   MatISGetLocalMat(*mat_n, &local_mat);
7528:   MatSetType(local_mat, new_local_type);

7530:   MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE);

7532:   /* Global to local map of received indices */
7533:   PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local); /* needed for values insertion */
7534:   ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local);
7535:   ISLocalToGlobalMappingDestroy(&l2gmap);

7537:   /* restore attributes -> type of incoming data and its size */
7538:   buf_size_idxs = 0;
7539:   for (i = 0; i < n_recvs; i++) {
7540:     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
7541:     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
7542:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7543:   }
7544:   PetscFree(recv_buffer_idxs);

7546:   /* set preallocation */
7547:   PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense);
7548:   if (!newisdense) {
7549:     PetscInt *new_local_nnz = NULL;

7551:     ptr_idxs = recv_buffer_idxs_local;
7552:     if (n_recvs) PetscCalloc1(new_local_rows, &new_local_nnz);
7553:     for (i = 0; i < n_recvs; i++) {
7554:       PetscInt j;
7555:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7556:         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
7557:       } else {
7558:         /* TODO */
7559:       }
7560:       ptr_idxs += olengths_idxs[i];
7561:     }
7562:     if (new_local_nnz) {
7563:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
7564:       MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz);
7565:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
7566:       MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz);
7567:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
7568:       MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz);
7569:     } else {
7570:       MatSetUp(local_mat);
7571:     }
7572:     PetscFree(new_local_nnz);
7573:   } else {
7574:     MatSetUp(local_mat);
7575:   }

7577:   /* set values */
7578:   ptr_vals = recv_buffer_vals;
7579:   ptr_idxs = recv_buffer_idxs_local;
7580:   for (i = 0; i < n_recvs; i++) {
7581:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7582:       MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE);
7583:       MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES);
7584:       MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY);
7585:       MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY);
7586:       MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE);
7587:     } else {
7588:       /* TODO */
7589:     }
7590:     ptr_idxs += olengths_idxs[i];
7591:     ptr_vals += olengths_vals[i];
7592:   }
7593:   MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY);
7594:   MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY);
7595:   MatISRestoreLocalMat(*mat_n, &local_mat);
7596:   MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY);
7597:   MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY);
7598:   PetscFree(recv_buffer_vals);

7600: #if 0
7601:   if (!restrict_comm) { /* check */
7602:     Vec       lvec,rvec;
7603:     PetscReal infty_error;

7605:     MatCreateVecs(mat,&rvec,&lvec);
7606:     VecSetRandom(rvec,NULL);
7607:     MatMult(mat,rvec,lvec);
7608:     VecScale(lvec,-1.0);
7609:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7610:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7611:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7612:     VecDestroy(&rvec);
7613:     VecDestroy(&lvec);
7614:   }
7615: #endif

7617:   /* assemble new additional is (if any) */
7618:   if (nis) {
7619:     PetscInt **temp_idxs, *count_is, j, psum;

7621:     MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE);
7622:     PetscCalloc1(nis, &count_is);
7623:     ptr_idxs = recv_buffer_idxs_is;
7624:     psum     = 0;
7625:     for (i = 0; i < n_recvs; i++) {
7626:       for (j = 0; j < nis; j++) {
7627:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7628:         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
7629:         psum += plen;
7630:         ptr_idxs += plen + 1; /* shift pointer to received data */
7631:       }
7632:     }
7633:     PetscMalloc1(nis, &temp_idxs);
7634:     PetscMalloc1(psum, &temp_idxs[0]);
7635:     for (i = 1; i < nis; i++) temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1];
7636:     PetscArrayzero(count_is, nis);
7637:     ptr_idxs = recv_buffer_idxs_is;
7638:     for (i = 0; i < n_recvs; i++) {
7639:       for (j = 0; j < nis; j++) {
7640:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7641:         PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen);
7642:         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
7643:         ptr_idxs += plen + 1; /* shift pointer to received data */
7644:       }
7645:     }
7646:     for (i = 0; i < nis; i++) {
7647:       ISDestroy(&isarray[i]);
7648:       PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]);
7649:       ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]);
7650:     }
7651:     PetscFree(count_is);
7652:     PetscFree(temp_idxs[0]);
7653:     PetscFree(temp_idxs);
7654:   }
7655:   /* free workspace */
7656:   PetscFree(recv_buffer_idxs_is);
7657:   MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE);
7658:   PetscFree(send_buffer_idxs);
7659:   MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE);
7660:   if (isdense) {
7661:     MatISGetLocalMat(mat, &local_mat);
7662:     MatDenseRestoreArrayRead(local_mat, &send_buffer_vals);
7663:     MatISRestoreLocalMat(mat, &local_mat);
7664:   } else {
7665:     /* PetscFree(send_buffer_vals); */
7666:   }
7667:   if (nis) {
7668:     MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE);
7669:     PetscFree(send_buffer_idxs_is);
7670:   }

7672:   if (nvecs) {
7673:     MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE);
7674:     MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE);
7675:     VecRestoreArray(nnsp_vec[0], &send_buffer_vecs);
7676:     VecDestroy(&nnsp_vec[0]);
7677:     VecCreate(comm_n, &nnsp_vec[0]);
7678:     VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE);
7679:     VecSetType(nnsp_vec[0], VECSTANDARD);
7680:     /* set values */
7681:     ptr_vals = recv_buffer_vecs;
7682:     ptr_idxs = recv_buffer_idxs_local;
7683:     VecGetArray(nnsp_vec[0], &send_buffer_vecs);
7684:     for (i = 0; i < n_recvs; i++) {
7685:       PetscInt j;
7686:       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
7687:       ptr_idxs += olengths_idxs[i];
7688:       ptr_vals += olengths_idxs[i] - 2;
7689:     }
7690:     VecRestoreArray(nnsp_vec[0], &send_buffer_vecs);
7691:     VecAssemblyBegin(nnsp_vec[0]);
7692:     VecAssemblyEnd(nnsp_vec[0]);
7693:   }

7695:   PetscFree(recv_buffer_vecs);
7696:   PetscFree(recv_buffer_idxs_local);
7697:   PetscFree(recv_req_idxs);
7698:   PetscFree(recv_req_vals);
7699:   PetscFree(recv_req_vecs);
7700:   PetscFree(recv_req_idxs_is);
7701:   PetscFree(send_req_idxs);
7702:   PetscFree(send_req_vals);
7703:   PetscFree(send_req_vecs);
7704:   PetscFree(send_req_idxs_is);
7705:   PetscFree(ilengths_vals);
7706:   PetscFree(ilengths_idxs);
7707:   PetscFree(olengths_vals);
7708:   PetscFree(olengths_idxs);
7709:   PetscFree(onodes);
7710:   if (nis) {
7711:     PetscFree(ilengths_idxs_is);
7712:     PetscFree(olengths_idxs_is);
7713:     PetscFree(onodes_is);
7714:   }
7715:   PetscSubcommDestroy(&subcomm);
7716:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
7717:     MatDestroy(mat_n);
7718:     for (i = 0; i < nis; i++) ISDestroy(&isarray[i]);
7719:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7720:       VecDestroy(&nnsp_vec[0]);
7721:     }
7722:     *mat_n = NULL;
7723:   }
7724:   return 0;
7725: }

7727: /* temporary hack into ksp private data structure */
7728: #include <petsc/private/kspimpl.h>

7730: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals)
7731: {
7732:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7733:   PC_IS                 *pcis   = (PC_IS *)pc->data;
7734:   Mat                    coarse_mat, coarse_mat_is, coarse_submat_dense;
7735:   Mat                    coarsedivudotp = NULL;
7736:   Mat                    coarseG, t_coarse_mat_is;
7737:   MatNullSpace           CoarseNullSpace = NULL;
7738:   ISLocalToGlobalMapping coarse_islg;
7739:   IS                     coarse_is, *isarray, corners;
7740:   PetscInt               i, im_active = -1, active_procs = -1;
7741:   PetscInt               nis, nisdofs, nisneu, nisvert;
7742:   PetscInt               coarse_eqs_per_proc;
7743:   PC                     pc_temp;
7744:   PCType                 coarse_pc_type;
7745:   KSPType                coarse_ksp_type;
7746:   PetscBool              multilevel_requested, multilevel_allowed;
7747:   PetscBool              coarse_reuse;
7748:   PetscInt               ncoarse, nedcfield;
7749:   PetscBool              compute_vecs = PETSC_FALSE;
7750:   PetscScalar           *array;
7751:   MatReuse               coarse_mat_reuse;
7752:   PetscBool              restr, full_restr, have_void;
7753:   PetscMPIInt            size;

7755:   PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0);
7756:   /* Assign global numbering to coarse dofs */
7757:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7758:     PetscInt ocoarse_size;
7759:     compute_vecs = PETSC_TRUE;

7761:     pcbddc->new_primal_space = PETSC_TRUE;
7762:     ocoarse_size             = pcbddc->coarse_size;
7763:     PetscFree(pcbddc->global_primal_indices);
7764:     PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices);
7765:     /* see if we can avoid some work */
7766:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7767:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7768:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7769:         KSPReset(pcbddc->coarse_ksp);
7770:         coarse_reuse = PETSC_FALSE;
7771:       } else { /* we can safely reuse already computed coarse matrix */
7772:         coarse_reuse = PETSC_TRUE;
7773:       }
7774:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7775:       coarse_reuse = PETSC_FALSE;
7776:     }
7777:     /* reset any subassembling information */
7778:     if (!coarse_reuse || pcbddc->recompute_topography) ISDestroy(&pcbddc->coarse_subassembling);
7779:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
7780:     coarse_reuse = PETSC_TRUE;
7781:   }
7782:   if (coarse_reuse && pcbddc->coarse_ksp) {
7783:     KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL);
7784:     PetscObjectReference((PetscObject)coarse_mat);
7785:     coarse_mat_reuse = MAT_REUSE_MATRIX;
7786:   } else {
7787:     coarse_mat       = NULL;
7788:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
7789:   }

7791:   /* creates temporary l2gmap and IS for coarse indexes */
7792:   ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is);
7793:   ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg);

7795:   /* creates temporary MATIS object for coarse matrix */
7796:   MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense);
7797:   MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is);
7798:   MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense);
7799:   MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY);
7800:   MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY);
7801:   MatDestroy(&coarse_submat_dense);

7803:   /* count "active" (i.e. with positive local size) and "void" processes */
7804:   im_active = !!(pcis->n);
7805:   MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));

7807:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7808:   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
7809:   /* full_restr : just use the receivers from the subassembling pattern */
7810:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
7811:   coarse_mat_is        = NULL;
7812:   multilevel_allowed   = PETSC_FALSE;
7813:   multilevel_requested = PETSC_FALSE;
7814:   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
7815:   if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
7816:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7817:   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
7818:   if (multilevel_requested) {
7819:     ncoarse    = active_procs / pcbddc->coarsening_ratio;
7820:     restr      = PETSC_FALSE;
7821:     full_restr = PETSC_FALSE;
7822:   } else {
7823:     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
7824:     restr      = PETSC_TRUE;
7825:     full_restr = PETSC_TRUE;
7826:   }
7827:   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7828:   ncoarse = PetscMax(1, ncoarse);
7829:   if (!pcbddc->coarse_subassembling) {
7830:     if (pcbddc->coarsening_ratio > 1) {
7831:       if (multilevel_requested) {
7832:         PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void);
7833:       } else {
7834:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void);
7835:       }
7836:     } else {
7837:       PetscMPIInt rank;

7839:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
7840:       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
7841:       ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling);
7842:     }
7843:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7844:     PetscInt psum;
7845:     if (pcbddc->coarse_ksp) psum = 1;
7846:     else psum = 0;
7847:     MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
7848:     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
7849:   }
7850:   /* determine if we can go multilevel */
7851:   if (multilevel_requested) {
7852:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7853:     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
7854:   }
7855:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

7857:   /* dump subassembling pattern */
7858:   if (pcbddc->dbg_flag && multilevel_allowed) ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer);
7859:   /* compute dofs splitting and neumann boundaries for coarse dofs */
7860:   nedcfield = -1;
7861:   corners   = NULL;
7862:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
7863:     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
7864:     const PetscInt        *idxs;
7865:     ISLocalToGlobalMapping tmap;

7867:     /* create map between primal indices (in local representative ordering) and local primal numbering */
7868:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap);
7869:     /* allocate space for temporary storage */
7870:     PetscMalloc1(pcbddc->local_primal_size, &tidxs);
7871:     PetscMalloc1(pcbddc->local_primal_size, &tidxs2);
7872:     /* allocate for IS array */
7873:     nisdofs = pcbddc->n_ISForDofsLocal;
7874:     if (pcbddc->nedclocal) {
7875:       if (pcbddc->nedfield > -1) {
7876:         nedcfield = pcbddc->nedfield;
7877:       } else {
7878:         nedcfield = 0;
7880:         nisdofs = 1;
7881:       }
7882:     }
7883:     nisneu  = !!pcbddc->NeumannBoundariesLocal;
7884:     nisvert = 0; /* nisvert is not used */
7885:     nis     = nisdofs + nisneu + nisvert;
7886:     PetscMalloc1(nis, &isarray);
7887:     /* dofs splitting */
7888:     for (i = 0; i < nisdofs; i++) {
7889:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
7890:       if (nedcfield != i) {
7891:         ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize);
7892:         ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs);
7893:         ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs);
7894:         ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs);
7895:       } else {
7896:         ISGetLocalSize(pcbddc->nedclocal, &tsize);
7897:         ISGetIndices(pcbddc->nedclocal, &idxs);
7898:         ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs);
7900:         ISRestoreIndices(pcbddc->nedclocal, &idxs);
7901:       }
7902:       ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2);
7903:       ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]);
7904:       /* ISView(isarray[i],0); */
7905:     }
7906:     /* neumann boundaries */
7907:     if (pcbddc->NeumannBoundariesLocal) {
7908:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
7909:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize);
7910:       ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs);
7911:       ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs);
7912:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs);
7913:       ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2);
7914:       ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]);
7915:       /* ISView(isarray[nisdofs],0); */
7916:     }
7917:     /* coordinates */
7918:     if (pcbddc->corner_selected) {
7919:       PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners);
7920:       ISGetLocalSize(corners, &tsize);
7921:       ISGetIndices(corners, &idxs);
7922:       ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs);
7924:       ISRestoreIndices(corners, &idxs);
7925:       PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners);
7926:       ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2);
7927:       ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners);
7928:     }
7929:     PetscFree(tidxs);
7930:     PetscFree(tidxs2);
7931:     ISLocalToGlobalMappingDestroy(&tmap);
7932:   } else {
7933:     nis     = 0;
7934:     nisdofs = 0;
7935:     nisneu  = 0;
7936:     nisvert = 0;
7937:     isarray = NULL;
7938:   }
7939:   /* destroy no longer needed map */
7940:   ISLocalToGlobalMappingDestroy(&coarse_islg);

7942:   /* subassemble */
7943:   if (multilevel_allowed) {
7944:     Vec       vp[1];
7945:     PetscInt  nvecs = 0;
7946:     PetscBool reuse, reuser;

7948:     if (coarse_mat) reuse = PETSC_TRUE;
7949:     else reuse = PETSC_FALSE;
7950:     MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
7951:     vp[0] = NULL;
7952:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7953:       VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]);
7954:       VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE);
7955:       VecSetType(vp[0], VECSTANDARD);
7956:       nvecs = 1;

7958:       if (pcbddc->divudotp) {
7959:         Mat      B, loc_divudotp;
7960:         Vec      v, p;
7961:         IS       dummy;
7962:         PetscInt np;

7964:         MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp);
7965:         MatGetSize(loc_divudotp, &np, NULL);
7966:         ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy);
7967:         MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B);
7968:         MatCreateVecs(B, &v, &p);
7969:         VecSet(p, 1.);
7970:         MatMultTranspose(B, p, v);
7971:         VecDestroy(&p);
7972:         MatDestroy(&B);
7973:         VecGetArray(vp[0], &array);
7974:         VecPlaceArray(pcbddc->vec1_P, array);
7975:         MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P);
7976:         VecResetArray(pcbddc->vec1_P);
7977:         VecRestoreArray(vp[0], &array);
7978:         ISDestroy(&dummy);
7979:         VecDestroy(&v);
7980:       }
7981:     }
7982:     if (reuser) {
7983:       PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp);
7984:     } else {
7985:       PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp);
7986:     }
7987:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
7988:       PetscScalar       *arraym;
7989:       const PetscScalar *arrayv;
7990:       PetscInt           nl;
7991:       VecGetLocalSize(vp[0], &nl);
7992:       MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp);
7993:       MatDenseGetArray(coarsedivudotp, &arraym);
7994:       VecGetArrayRead(vp[0], &arrayv);
7995:       PetscArraycpy(arraym, arrayv, nl);
7996:       VecRestoreArrayRead(vp[0], &arrayv);
7997:       MatDenseRestoreArray(coarsedivudotp, &arraym);
7998:       VecDestroy(&vp[0]);
7999:     } else {
8000:       MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp);
8001:     }
8002:   } else {
8003:     PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL);
8004:   }
8005:   if (coarse_mat_is || coarse_mat) {
8006:     if (!multilevel_allowed) {
8007:       MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat);
8008:     } else {
8009:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8010:       if (coarse_mat_is) {
8012:         PetscObjectReference((PetscObject)coarse_mat_is);
8013:         coarse_mat = coarse_mat_is;
8014:       }
8015:     }
8016:   }
8017:   MatDestroy(&t_coarse_mat_is);
8018:   MatDestroy(&coarse_mat_is);

8020:   /* create local to global scatters for coarse problem */
8021:   if (compute_vecs) {
8022:     PetscInt lrows;
8023:     VecDestroy(&pcbddc->coarse_vec);
8024:     if (coarse_mat) {
8025:       MatGetLocalSize(coarse_mat, &lrows, NULL);
8026:     } else {
8027:       lrows = 0;
8028:     }
8029:     VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec);
8030:     VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE);
8031:     VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8032:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8033:     VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob);
8034:   }
8035:   ISDestroy(&coarse_is);

8037:   /* set defaults for coarse KSP and PC */
8038:   if (multilevel_allowed) {
8039:     coarse_ksp_type = KSPRICHARDSON;
8040:     coarse_pc_type  = PCBDDC;
8041:   } else {
8042:     coarse_ksp_type = KSPPREONLY;
8043:     coarse_pc_type  = PCREDUNDANT;
8044:   }

8046:   /* print some info if requested */
8047:   if (pcbddc->dbg_flag) {
8048:     if (!multilevel_allowed) {
8049:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
8050:       if (multilevel_requested) {
8051:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio);
8052:       } else if (pcbddc->max_levels) {
8053:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels);
8054:       }
8055:       PetscViewerFlush(pcbddc->dbg_viewer);
8056:     }
8057:   }

8059:   /* communicate coarse discrete gradient */
8060:   coarseG = NULL;
8061:   if (pcbddc->nedcG && multilevel_allowed) {
8062:     MPI_Comm ccomm;
8063:     if (coarse_mat) {
8064:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8065:     } else {
8066:       ccomm = MPI_COMM_NULL;
8067:     }
8068:     MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG);
8069:   }

8071:   /* create the coarse KSP object only once with defaults */
8072:   if (coarse_mat) {
8073:     PetscBool   isredundant, isbddc, force, valid;
8074:     PetscViewer dbg_viewer = NULL;
8075:     PetscBool   isset, issym, isher, isspd;

8077:     if (pcbddc->dbg_flag) {
8078:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8079:       PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level);
8080:     }
8081:     if (!pcbddc->coarse_ksp) {
8082:       char   prefix[256], str_level[16];
8083:       size_t len;

8085:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp);
8086:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure);
8087:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1);
8088:       KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1);
8089:       KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat);
8090:       KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type);
8091:       KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE);
8092:       KSPGetPC(pcbddc->coarse_ksp, &pc_temp);
8093:       /* TODO is this logic correct? should check for coarse_mat type */
8094:       PCSetType(pc_temp, coarse_pc_type);
8095:       /* prefix */
8096:       PetscStrcpy(prefix, "");
8097:       PetscStrcpy(str_level, "");
8098:       if (!pcbddc->current_level) {
8099:         PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix));
8100:         PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix));
8101:       } else {
8102:         PetscStrlen(((PetscObject)pc)->prefix, &len);
8103:         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8104:         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8105:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8106:         PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1);
8107:         PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level));
8108:         PetscStrlcat(prefix, str_level, sizeof(prefix));
8109:       }
8110:       KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix);
8111:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8112:       PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1);
8113:       PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio);
8114:       PCBDDCSetLevels(pc_temp, pcbddc->max_levels);
8115:       /* allow user customization */
8116:       KSPSetFromOptions(pcbddc->coarse_ksp);
8117:       /* get some info after set from options */
8118:       KSPGetPC(pcbddc->coarse_ksp, &pc_temp);
8119:       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8120:       force = PETSC_FALSE;
8121:       PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL);
8122:       PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "");
8123:       PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc);
8124:       if (multilevel_allowed && !force && !valid) {
8125:         isbddc = PETSC_TRUE;
8126:         PCSetType(pc_temp, PCBDDC);
8127:         PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1);
8128:         PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio);
8129:         PCBDDCSetLevels(pc_temp, pcbddc->max_levels);
8130:         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8131:           PetscObjectOptionsBegin((PetscObject)pc_temp);
8132:           (*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject);
8133:           PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject);
8134:           PetscOptionsEnd();
8135:           pc_temp->setfromoptionscalled++;
8136:         }
8137:       }
8138:     }
8139:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8140:     KSPGetPC(pcbddc->coarse_ksp, &pc_temp);
8141:     if (nisdofs) {
8142:       PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray);
8143:       for (i = 0; i < nisdofs; i++) ISDestroy(&isarray[i]);
8144:     }
8145:     if (nisneu) {
8146:       PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]);
8147:       ISDestroy(&isarray[nisdofs]);
8148:     }
8149:     if (nisvert) {
8150:       PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]);
8151:       ISDestroy(&isarray[nis - 1]);
8152:     }
8153:     if (coarseG) PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE);

8155:     /* get some info after set from options */
8156:     PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc);

8158:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8159:     if (isbddc && !multilevel_allowed) PCSetType(pc_temp, coarse_pc_type);
8160:     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8161:     force = PETSC_FALSE;
8162:     PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL);
8163:     PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "");
8164:     if (multilevel_requested && multilevel_allowed && !valid && !force) PCSetType(pc_temp, PCBDDC);
8165:     PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant);
8166:     if (isredundant) {
8167:       KSP inner_ksp;
8168:       PC  inner_pc;

8170:       PCRedundantGetKSP(pc_temp, &inner_ksp);
8171:       KSPGetPC(inner_ksp, &inner_pc);
8172:     }

8174:     /* parameters which miss an API */
8175:     PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc);
8176:     if (isbddc) {
8177:       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;

8179:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8180:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8181:       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8182:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8183:       if (pcbddc_coarse->benign_saddle_point) {
8184:         Mat                    coarsedivudotp_is;
8185:         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8186:         IS                     row, col;
8187:         const PetscInt        *gidxs;
8188:         PetscInt               n, st, M, N;

8190:         MatGetSize(coarsedivudotp, &n, NULL);
8191:         MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat));
8192:         st = st - n;
8193:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row);
8194:         MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL);
8195:         ISLocalToGlobalMappingGetSize(l2gmap, &n);
8196:         ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs);
8197:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col);
8198:         ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs);
8199:         ISLocalToGlobalMappingCreateIS(row, &rl2g);
8200:         ISLocalToGlobalMappingCreateIS(col, &cl2g);
8201:         ISGetSize(row, &M);
8202:         MatGetSize(coarse_mat, &N, NULL);
8203:         ISDestroy(&row);
8204:         ISDestroy(&col);
8205:         MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is);
8206:         MatSetType(coarsedivudotp_is, MATIS);
8207:         MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N);
8208:         MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g);
8209:         ISLocalToGlobalMappingDestroy(&rl2g);
8210:         ISLocalToGlobalMappingDestroy(&cl2g);
8211:         MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp);
8212:         MatDestroy(&coarsedivudotp);
8213:         PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL);
8214:         MatDestroy(&coarsedivudotp_is);
8215:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8216:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8217:       }
8218:     }

8220:     /* propagate symmetry info of coarse matrix */
8221:     MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE);
8222:     MatIsSymmetricKnown(pc->pmat, &isset, &issym);
8223:     if (isset) MatSetOption(coarse_mat, MAT_SYMMETRIC, issym);
8224:     MatIsHermitianKnown(pc->pmat, &isset, &isher);
8225:     if (isset) MatSetOption(coarse_mat, MAT_HERMITIAN, isher);
8226:     MatIsSPDKnown(pc->pmat, &isset, &isspd);
8227:     if (isset) MatSetOption(coarse_mat, MAT_SPD, isspd);

8229:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE);
8230:     /* set operators */
8231:     MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view");
8232:     MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix);
8233:     KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat);
8234:     if (pcbddc->dbg_flag) PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level);
8235:   }
8236:   MatDestroy(&coarseG);
8237:   PetscFree(isarray);
8238: #if 0
8239:   {
8240:     PetscViewer viewer;
8241:     char filename[256];
8242:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8243:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8244:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8245:     MatView(coarse_mat,viewer);
8246:     PetscViewerPopFormat(viewer);
8247:     PetscViewerDestroy(&viewer);
8248:   }
8249: #endif

8251:   if (corners) {
8252:     Vec             gv;
8253:     IS              is;
8254:     const PetscInt *idxs;
8255:     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8256:     PetscScalar    *coords;

8259:     VecGetSize(pcbddc->coarse_vec, &N);
8260:     VecGetLocalSize(pcbddc->coarse_vec, &n);
8261:     VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv);
8262:     VecSetBlockSize(gv, cdim);
8263:     VecSetSizes(gv, n * cdim, N * cdim);
8264:     VecSetType(gv, VECSTANDARD);
8265:     VecSetFromOptions(gv);
8266:     VecSet(gv, PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */

8268:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is);
8269:     ISGetLocalSize(is, &n);
8270:     ISGetIndices(is, &idxs);
8271:     PetscMalloc1(n * cdim, &coords);
8272:     for (i = 0; i < n; i++) {
8273:       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8274:     }
8275:     ISRestoreIndices(is, &idxs);
8276:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is);

8278:     ISGetLocalSize(corners, &n);
8279:     ISGetIndices(corners, &idxs);
8280:     VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES);
8281:     ISRestoreIndices(corners, &idxs);
8282:     PetscFree(coords);
8283:     VecAssemblyBegin(gv);
8284:     VecAssemblyEnd(gv);
8285:     VecGetArray(gv, &coords);
8286:     if (pcbddc->coarse_ksp) {
8287:       PC        coarse_pc;
8288:       PetscBool isbddc;

8290:       KSPGetPC(pcbddc->coarse_ksp, &coarse_pc);
8291:       PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc);
8292:       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8293:         PetscReal *realcoords;

8295:         VecGetLocalSize(gv, &n);
8296: #if defined(PETSC_USE_COMPLEX)
8297:         PetscMalloc1(n, &realcoords);
8298:         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8299: #else
8300:         realcoords = coords;
8301: #endif
8302:         PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords);
8303: #if defined(PETSC_USE_COMPLEX)
8304:         PetscFree(realcoords);
8305: #endif
8306:       }
8307:     }
8308:     VecRestoreArray(gv, &coords);
8309:     VecDestroy(&gv);
8310:   }
8311:   ISDestroy(&corners);

8313:   if (pcbddc->coarse_ksp) {
8314:     Vec crhs, csol;

8316:     KSPGetSolution(pcbddc->coarse_ksp, &csol);
8317:     KSPGetRhs(pcbddc->coarse_ksp, &crhs);
8318:     if (!csol) MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL);
8319:     if (!crhs) MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs));
8320:   }
8321:   MatDestroy(&coarsedivudotp);

8323:   /* compute null space for coarse solver if the benign trick has been requested */
8324:   if (pcbddc->benign_null) {
8325:     VecSet(pcbddc->vec1_P, 0.);
8326:     for (i = 0; i < pcbddc->benign_n; i++) VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES);
8327:     VecAssemblyBegin(pcbddc->vec1_P);
8328:     VecAssemblyEnd(pcbddc->vec1_P);
8329:     VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD);
8330:     VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD);
8331:     if (coarse_mat) {
8332:       Vec          nullv;
8333:       PetscScalar *array, *array2;
8334:       PetscInt     nl;

8336:       MatCreateVecs(coarse_mat, &nullv, NULL);
8337:       VecGetLocalSize(nullv, &nl);
8338:       VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array);
8339:       VecGetArray(nullv, &array2);
8340:       PetscArraycpy(array2, array, nl);
8341:       VecRestoreArray(nullv, &array2);
8342:       VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array);
8343:       VecNormalize(nullv, NULL);
8344:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace);
8345:       VecDestroy(&nullv);
8346:     }
8347:   }
8348:   PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0);

8350:   PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0);
8351:   if (pcbddc->coarse_ksp) {
8352:     PetscBool ispreonly;

8354:     if (CoarseNullSpace) {
8355:       PetscBool isnull;

8357:       MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull);
8358:       if (isnull) MatSetNullSpace(coarse_mat, CoarseNullSpace);
8359:       /* TODO: add local nullspaces (if any) */
8360:     }
8361:     /* setup coarse ksp */
8362:     KSPSetUp(pcbddc->coarse_ksp);
8363:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8364:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly);
8365:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8366:       KSP         check_ksp;
8367:       KSPType     check_ksp_type;
8368:       PC          check_pc;
8369:       Vec         check_vec, coarse_vec;
8370:       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
8371:       PetscInt    its;
8372:       PetscBool   compute_eigs;
8373:       PetscReal  *eigs_r, *eigs_c;
8374:       PetscInt    neigs;
8375:       const char *prefix;

8377:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8378:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp);
8379:       PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0);
8380:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE);
8381:       KSPSetOperators(check_ksp, coarse_mat, coarse_mat);
8382:       KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size);
8383:       /* prevent from setup unneeded object */
8384:       KSPGetPC(check_ksp, &check_pc);
8385:       PCSetType(check_pc, PCNONE);
8386:       if (ispreonly) {
8387:         check_ksp_type = KSPPREONLY;
8388:         compute_eigs   = PETSC_FALSE;
8389:       } else {
8390:         check_ksp_type = KSPGMRES;
8391:         compute_eigs   = PETSC_TRUE;
8392:       }
8393:       KSPSetType(check_ksp, check_ksp_type);
8394:       KSPSetComputeSingularValues(check_ksp, compute_eigs);
8395:       KSPSetComputeEigenvalues(check_ksp, compute_eigs);
8396:       KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1);
8397:       KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix);
8398:       KSPSetOptionsPrefix(check_ksp, prefix);
8399:       KSPAppendOptionsPrefix(check_ksp, "check_");
8400:       KSPSetFromOptions(check_ksp);
8401:       KSPSetUp(check_ksp);
8402:       KSPGetPC(pcbddc->coarse_ksp, &check_pc);
8403:       KSPSetPC(check_ksp, check_pc);
8404:       /* create random vec */
8405:       MatCreateVecs(coarse_mat, &coarse_vec, &check_vec);
8406:       VecSetRandom(check_vec, NULL);
8407:       MatMult(coarse_mat, check_vec, coarse_vec);
8408:       /* solve coarse problem */
8409:       KSPSolve(check_ksp, coarse_vec, coarse_vec);
8410:       KSPCheckSolve(check_ksp, pc, coarse_vec);
8411:       /* set eigenvalue estimation if preonly has not been requested */
8412:       if (compute_eigs) {
8413:         PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r);
8414:         PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c);
8415:         KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs);
8416:         if (neigs) {
8417:           lambda_max = eigs_r[neigs - 1];
8418:           lambda_min = eigs_r[0];
8419:           if (pcbddc->use_coarse_estimates) {
8420:             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8421:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min);
8422:               KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min));
8423:             }
8424:           }
8425:         }
8426:       }

8428:       /* check coarse problem residual error */
8429:       if (pcbddc->dbg_flag) {
8430:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8431:         PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1));
8432:         VecAXPY(check_vec, -1.0, coarse_vec);
8433:         VecNorm(check_vec, NORM_INFINITY, &infty_error);
8434:         MatMult(coarse_mat, check_vec, coarse_vec);
8435:         VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error);
8436:         PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates);
8437:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer);
8438:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer);
8439:         PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error);
8440:         PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error);
8441:         if (CoarseNullSpace) PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n");
8442:         if (compute_eigs) {
8443:           PetscReal          lambda_max_s, lambda_min_s;
8444:           KSPConvergedReason reason;
8445:           KSPGetType(check_ksp, &check_ksp_type);
8446:           KSPGetIterationNumber(check_ksp, &its);
8447:           KSPGetConvergedReason(check_ksp, &reason);
8448:           KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s);
8449:           PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s);
8450:           for (i = 0; i < neigs; i++) PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]);
8451:         }
8452:         PetscViewerFlush(dbg_viewer);
8453:         PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1));
8454:       }
8455:       VecDestroy(&check_vec);
8456:       VecDestroy(&coarse_vec);
8457:       KSPDestroy(&check_ksp);
8458:       if (compute_eigs) {
8459:         PetscFree(eigs_r);
8460:         PetscFree(eigs_c);
8461:       }
8462:     }
8463:   }
8464:   MatNullSpaceDestroy(&CoarseNullSpace);
8465:   /* print additional info */
8466:   if (pcbddc->dbg_flag) {
8467:     /* waits until all processes reaches this point */
8468:     PetscBarrier((PetscObject)pc);
8469:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level);
8470:     PetscViewerFlush(pcbddc->dbg_viewer);
8471:   }

8473:   /* free memory */
8474:   MatDestroy(&coarse_mat);
8475:   PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0);
8476:   return 0;
8477: }

8479: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
8480: {
8481:   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
8482:   PC_IS          *pcis   = (PC_IS *)pc->data;
8483:   Mat_IS         *matis  = (Mat_IS *)pc->pmat->data;
8484:   IS              subset, subset_mult, subset_n;
8485:   PetscInt        local_size, coarse_size = 0;
8486:   PetscInt       *local_primal_indices = NULL;
8487:   const PetscInt *t_local_primal_indices;

8489:   /* Compute global number of coarse dofs */
8491:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n);
8492:   ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset);
8493:   ISDestroy(&subset_n);
8494:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult);
8495:   ISRenumber(subset, subset_mult, &coarse_size, &subset_n);
8496:   ISDestroy(&subset);
8497:   ISDestroy(&subset_mult);
8498:   ISGetLocalSize(subset_n, &local_size);
8500:   PetscMalloc1(local_size, &local_primal_indices);
8501:   ISGetIndices(subset_n, &t_local_primal_indices);
8502:   PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size);
8503:   ISRestoreIndices(subset_n, &t_local_primal_indices);
8504:   ISDestroy(&subset_n);

8506:   /* check numbering */
8507:   if (pcbddc->dbg_flag) {
8508:     PetscScalar coarsesum, *array, *array2;
8509:     PetscInt    i;
8510:     PetscBool   set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE;

8512:     PetscViewerFlush(pcbddc->dbg_viewer);
8513:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n");
8514:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n");
8515:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8516:     /* counter */
8517:     VecSet(pcis->vec1_global, 0.0);
8518:     VecSet(pcis->vec1_N, 1.0);
8519:     VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8520:     VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8521:     VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD);
8522:     VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD);
8523:     VecSet(pcis->vec1_N, 0.0);
8524:     for (i = 0; i < pcbddc->local_primal_size; i++) VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES);
8525:     VecAssemblyBegin(pcis->vec1_N);
8526:     VecAssemblyEnd(pcis->vec1_N);
8527:     VecSet(pcis->vec1_global, 0.0);
8528:     VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8529:     VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8530:     VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD);
8531:     VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD);
8532:     VecGetArray(pcis->vec1_N, &array);
8533:     VecGetArray(pcis->vec2_N, &array2);
8534:     for (i = 0; i < pcis->n; i++) {
8535:       if (array[i] != 0.0 && array[i] != array2[i]) {
8536:         PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi;
8537:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8538:         set_error      = PETSC_TRUE;
8539:         ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi);
8540:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh);
8541:       }
8542:     }
8543:     VecRestoreArray(pcis->vec2_N, &array2);
8544:     MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
8545:     PetscViewerFlush(pcbddc->dbg_viewer);
8546:     for (i = 0; i < pcis->n; i++) {
8547:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]);
8548:     }
8549:     VecRestoreArray(pcis->vec1_N, &array);
8550:     VecSet(pcis->vec1_global, 0.0);
8551:     VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8552:     VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
8553:     VecSum(pcis->vec1_global, &coarsesum);
8554:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum));
8555:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8556:       PetscInt *gidxs;

8558:       PetscMalloc1(pcbddc->local_primal_size, &gidxs);
8559:       ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs);
8560:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n");
8561:       PetscViewerFlush(pcbddc->dbg_viewer);
8562:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank);
8563:       for (i = 0; i < pcbddc->local_primal_size; i++) {
8564:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i]);
8565:       }
8566:       PetscViewerFlush(pcbddc->dbg_viewer);
8567:       PetscFree(gidxs);
8568:     }
8569:     PetscViewerFlush(pcbddc->dbg_viewer);
8570:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8572:   }

8574:   /* get back data */
8575:   *coarse_size_n          = coarse_size;
8576:   *local_primal_indices_n = local_primal_indices;
8577:   return 0;
8578: }

8580: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
8581: {
8582:   IS           localis_t;
8583:   PetscInt     i, lsize, *idxs, n;
8584:   PetscScalar *vals;

8586:   /* get indices in local ordering exploiting local to global map */
8587:   ISGetLocalSize(globalis, &lsize);
8588:   PetscMalloc1(lsize, &vals);
8589:   for (i = 0; i < lsize; i++) vals[i] = 1.0;
8590:   ISGetIndices(globalis, (const PetscInt **)&idxs);
8591:   VecSet(gwork, 0.0);
8592:   VecSet(lwork, 0.0);
8593:   if (idxs) { /* multilevel guard */
8594:     VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE);
8595:     VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES);
8596:   }
8597:   VecAssemblyBegin(gwork);
8598:   ISRestoreIndices(globalis, (const PetscInt **)&idxs);
8599:   PetscFree(vals);
8600:   VecAssemblyEnd(gwork);
8601:   /* now compute set in local ordering */
8602:   VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD);
8603:   VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD);
8604:   VecGetArrayRead(lwork, (const PetscScalar **)&vals);
8605:   VecGetSize(lwork, &n);
8606:   for (i = 0, lsize = 0; i < n; i++) {
8607:     if (PetscRealPart(vals[i]) > 0.5) lsize++;
8608:   }
8609:   PetscMalloc1(lsize, &idxs);
8610:   for (i = 0, lsize = 0; i < n; i++) {
8611:     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
8612:   }
8613:   VecRestoreArrayRead(lwork, (const PetscScalar **)&vals);
8614:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t);
8615:   *localis = localis_t;
8616:   return 0;
8617: }

8619: PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
8620: {
8621:   PC_IS   *pcis   = (PC_IS *)pc->data;
8622:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8623:   PC_IS   *pcisf;
8624:   PC_BDDC *pcbddcf;
8625:   PC       pcf;

8627:   PCCreate(PetscObjectComm((PetscObject)pc), &pcf);
8628:   PCSetOperators(pcf, pc->mat, pc->pmat);
8629:   PCSetType(pcf, PCBDDC);

8631:   pcisf   = (PC_IS *)pcf->data;
8632:   pcbddcf = (PC_BDDC *)pcf->data;

8634:   pcisf->is_B_local = pcis->is_B_local;
8635:   pcisf->vec1_N     = pcis->vec1_N;
8636:   pcisf->BtoNmap    = pcis->BtoNmap;
8637:   pcisf->n          = pcis->n;
8638:   pcisf->n_B        = pcis->n_B;

8640:   PetscFree(pcbddcf->mat_graph);
8641:   PetscFree(pcbddcf->sub_schurs);
8642:   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
8643:   pcbddcf->sub_schurs            = schurs;
8644:   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
8645:   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
8646:   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
8647:   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
8648:   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
8649:   pcbddcf->use_faces             = PETSC_TRUE;
8650:   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
8651:   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
8652:   pcbddcf->use_qr_single         = (PetscBool)!constraints;
8653:   pcbddcf->fake_change           = PETSC_TRUE;
8654:   pcbddcf->dbg_flag              = pcbddc->dbg_flag;

8656:   PCBDDCAdaptiveSelection(pcf);
8657:   PCBDDCConstraintsSetUp(pcf);

8659:   *change = pcbddcf->ConstraintMatrix;
8660:   if (change_primal) ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal);
8661:   if (change_primal_mult) ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult);
8662:   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;

8664:   if (schurs) pcbddcf->sub_schurs = NULL;
8665:   pcbddcf->ConstraintMatrix = NULL;
8666:   pcbddcf->mat_graph        = NULL;
8667:   pcisf->is_B_local         = NULL;
8668:   pcisf->vec1_N             = NULL;
8669:   pcisf->BtoNmap            = NULL;
8670:   PCDestroy(&pcf);
8671:   return 0;
8672: }

8674: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8675: {
8676:   PC_IS          *pcis       = (PC_IS *)pc->data;
8677:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
8678:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
8679:   Mat             S_j;
8680:   PetscInt       *used_xadj, *used_adjncy;
8681:   PetscBool       free_used_adj;

8683:   PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0);
8684:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8685:   free_used_adj = PETSC_FALSE;
8686:   if (pcbddc->sub_schurs_layers == -1) {
8687:     used_xadj   = NULL;
8688:     used_adjncy = NULL;
8689:   } else {
8690:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8691:       used_xadj   = pcbddc->mat_graph->xadj;
8692:       used_adjncy = pcbddc->mat_graph->adjncy;
8693:     } else if (pcbddc->computed_rowadj) {
8694:       used_xadj   = pcbddc->mat_graph->xadj;
8695:       used_adjncy = pcbddc->mat_graph->adjncy;
8696:     } else {
8697:       PetscBool       flg_row = PETSC_FALSE;
8698:       const PetscInt *xadj, *adjncy;
8699:       PetscInt        nvtxs;

8701:       MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row);
8702:       if (flg_row) {
8703:         PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy);
8704:         PetscArraycpy(used_xadj, xadj, nvtxs + 1);
8705:         PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]);
8706:         free_used_adj = PETSC_TRUE;
8707:       } else {
8708:         pcbddc->sub_schurs_layers = -1;
8709:         used_xadj                 = NULL;
8710:         used_adjncy               = NULL;
8711:       }
8712:       MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row);
8713:     }
8714:   }

8716:   /* setup sub_schurs data */
8717:   MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j);
8718:   if (!sub_schurs->schur_explicit) {
8719:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8720:     MatSchurComplementSetKSP(S_j, pcbddc->ksp_D);
8721:     PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL);
8722:   } else {
8723:     Mat       change        = NULL;
8724:     Vec       scaling       = NULL;
8725:     IS        change_primal = NULL, iP;
8726:     PetscInt  benign_n;
8727:     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
8728:     PetscBool need_change       = PETSC_FALSE;
8729:     PetscBool discrete_harmonic = PETSC_FALSE;

8731:     if (!pcbddc->use_vertices && reuse_solvers) {
8732:       PetscInt n_vertices;

8734:       ISGetLocalSize(sub_schurs->is_vertices, &n_vertices);
8735:       reuse_solvers = (PetscBool)!n_vertices;
8736:     }
8737:     if (!pcbddc->benign_change_explicit) {
8738:       benign_n = pcbddc->benign_n;
8739:     } else {
8740:       benign_n = 0;
8741:     }
8742:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8743:        We need a global reduction to avoid possible deadlocks.
8744:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8745:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8746:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8747:       MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
8748:       need_change = (PetscBool)(!need_change);
8749:     }
8750:     /* If the user defines additional constraints, we import them here */
8751:     if (need_change) {
8753:       PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr);
8754:     }
8755:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

8757:     PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP);
8758:     if (iP) {
8759:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
8760:       PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL);
8761:       PetscOptionsEnd();
8762:     }
8763:     if (discrete_harmonic) {
8764:       Mat A;
8765:       MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A);
8766:       MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL);
8767:       PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP);
8768:       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
8769:                                      pcbddc->benign_zerodiag_subs, change, change_primal));
8770:       MatDestroy(&A);
8771:     } else {
8772:       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
8773:                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
8774:     }
8775:     MatDestroy(&change);
8776:     ISDestroy(&change_primal);
8777:   }
8778:   MatDestroy(&S_j);

8780:   /* free adjacency */
8781:   if (free_used_adj) PetscFree2(used_xadj, used_adjncy);
8782:   PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0);
8783:   return 0;
8784: }

8786: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8787: {
8788:   PC_IS      *pcis   = (PC_IS *)pc->data;
8789:   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
8790:   PCBDDCGraph graph;

8792:   /* attach interface graph for determining subsets */
8793:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8794:     IS       verticesIS, verticescomm;
8795:     PetscInt vsize, *idxs;

8797:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS);
8798:     ISGetSize(verticesIS, &vsize);
8799:     ISGetIndices(verticesIS, (const PetscInt **)&idxs);
8800:     ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm);
8801:     ISRestoreIndices(verticesIS, (const PetscInt **)&idxs);
8802:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS);
8803:     PCBDDCGraphCreate(&graph);
8804:     PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount);
8805:     PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm);
8806:     ISDestroy(&verticescomm);
8807:     PCBDDCGraphComputeConnectedComponents(graph);
8808:   } else {
8809:     graph = pcbddc->mat_graph;
8810:   }
8811:   /* print some info */
8812:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8813:     IS       vertices;
8814:     PetscInt nv, nedges, nfaces;
8815:     PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer);
8816:     PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices);
8817:     ISGetSize(vertices, &nv);
8818:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8819:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n");
8820:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices);
8821:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges);
8822:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces);
8823:     PetscViewerFlush(pcbddc->dbg_viewer);
8824:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8825:     PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices);
8826:   }

8828:   /* sub_schurs init */
8829:   if (!pcbddc->sub_schurs) PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8830:   PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE);

8832:   /* free graph struct */
8833:   if (pcbddc->sub_schurs_rebuild) PCBDDCGraphDestroy(&graph);
8834:   return 0;
8835: }

8837: PetscErrorCode PCBDDCCheckOperator(PC pc)
8838: {
8839:   PC_IS   *pcis   = (PC_IS *)pc->data;
8840:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

8842:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8843:     IS           zerodiag = NULL;
8844:     Mat          S_j, B0_B = NULL;
8845:     Vec          dummy_vec = NULL, vec_check_B, vec_scale_P;
8846:     PetscScalar *p0_check, *array, *array2;
8847:     PetscReal    norm;
8848:     PetscInt     i;

8850:     /* B0 and B0_B */
8851:     if (zerodiag) {
8852:       IS dummy;

8854:       ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &dummy);
8855:       MatCreateSubMatrix(pcbddc->benign_B0, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B);
8856:       MatCreateVecs(B0_B, NULL, &dummy_vec);
8857:       ISDestroy(&dummy);
8858:     }
8859:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8860:     VecDuplicate(pcbddc->vec1_P, &vec_scale_P);
8861:     VecSet(pcbddc->vec1_P, 1.0);
8862:     VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD);
8863:     VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD);
8864:     VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE);
8865:     VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE);
8866:     VecReciprocal(vec_scale_P);
8867:     /* S_j */
8868:     MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j);
8869:     MatSchurComplementSetKSP(S_j, pcbddc->ksp_D);

8871:     /* mimic vector in \widetilde{W}_\Gamma */
8872:     VecSetRandom(pcis->vec1_N, NULL);
8873:     /* continuous in primal space */
8874:     VecSetRandom(pcbddc->coarse_vec, NULL);
8875:     VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE);
8876:     VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE);
8877:     VecGetArray(pcbddc->vec1_P, &array);
8878:     PetscCalloc1(pcbddc->benign_n, &p0_check);
8879:     for (i = 0; i < pcbddc->benign_n; i++) p0_check[i] = array[pcbddc->local_primal_size - pcbddc->benign_n + i];
8880:     VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES);
8881:     VecRestoreArray(pcbddc->vec1_P, &array);
8882:     VecAssemblyBegin(pcis->vec1_N);
8883:     VecAssemblyEnd(pcis->vec1_N);
8884:     VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD);
8885:     VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD);
8886:     VecDuplicate(pcis->vec2_B, &vec_check_B);
8887:     VecCopy(pcis->vec2_B, vec_check_B);

8889:     /* assemble rhs for coarse problem */
8890:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8891:     /* local with Schur */
8892:     MatMult(S_j, pcis->vec2_B, pcis->vec1_B);
8893:     if (zerodiag) {
8894:       VecGetArray(dummy_vec, &array);
8895:       for (i = 0; i < pcbddc->benign_n; i++) array[i] = p0_check[i];
8896:       VecRestoreArray(dummy_vec, &array);
8897:       MatMultTransposeAdd(B0_B, dummy_vec, pcis->vec1_B, pcis->vec1_B);
8898:     }
8899:     /* sum on primal nodes the local contributions */
8900:     VecScatterBegin(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE);
8901:     VecScatterEnd(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE);
8902:     VecGetArray(pcis->vec1_N, &array);
8903:     VecGetArray(pcbddc->vec1_P, &array2);
8904:     for (i = 0; i < pcbddc->local_primal_size; i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8905:     VecRestoreArray(pcbddc->vec1_P, &array2);
8906:     VecRestoreArray(pcis->vec1_N, &array);
8907:     VecSet(pcbddc->coarse_vec, 0.);
8908:     VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD);
8909:     VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD);
8910:     VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE);
8911:     VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE);
8912:     VecGetArray(pcbddc->vec1_P, &array);
8913:     /* scale primal nodes (BDDC sums contibutions) */
8914:     VecPointwiseMult(pcbddc->vec1_P, vec_scale_P, pcbddc->vec1_P);
8915:     VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES);
8916:     VecRestoreArray(pcbddc->vec1_P, &array);
8917:     VecAssemblyBegin(pcis->vec1_N);
8918:     VecAssemblyEnd(pcis->vec1_N);
8919:     VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
8920:     VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
8921:     /* global: \widetilde{B0}_B w_\Gamma */
8922:     if (zerodiag) {
8923:       MatMult(B0_B, pcis->vec2_B, dummy_vec);
8924:       VecGetArray(dummy_vec, &array);
8925:       for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = array[i];
8926:       VecRestoreArray(dummy_vec, &array);
8927:     }
8928:     /* BDDC */
8929:     VecSet(pcis->vec1_D, 0.);
8930:     PCBDDCApplyInterfacePreconditioner(pc, PETSC_FALSE);

8932:     VecCopy(pcis->vec1_B, pcis->vec2_B);
8933:     VecAXPY(pcis->vec1_B, -1.0, vec_check_B);
8934:     VecNorm(pcis->vec1_B, NORM_INFINITY, &norm);
8935:     PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC local error is %1.4e\n", PetscGlobalRank, (double)norm);
8936:     for (i = 0; i < pcbddc->benign_n; i++) PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n", PetscGlobalRank, i, (double)PetscAbsScalar(pcbddc->benign_p0[i] - p0_check[i]));
8937:     PetscFree(p0_check);
8938:     VecDestroy(&vec_scale_P);
8939:     VecDestroy(&vec_check_B);
8940:     VecDestroy(&dummy_vec);
8941:     MatDestroy(&S_j);
8942:     MatDestroy(&B0_B);
8943:   }
8944:   return 0;
8945: }

8947: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8948: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8949: {
8950:   Mat         At;
8951:   IS          rows;
8952:   PetscInt    rst, ren;
8953:   PetscLayout rmap;

8955:   rst = ren = 0;
8956:   if (ccomm != MPI_COMM_NULL) {
8957:     PetscLayoutCreate(ccomm, &rmap);
8958:     PetscLayoutSetSize(rmap, A->rmap->N);
8959:     PetscLayoutSetBlockSize(rmap, 1);
8960:     PetscLayoutSetUp(rmap);
8961:     PetscLayoutGetRange(rmap, &rst, &ren);
8962:   }
8963:   ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows);
8964:   MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At);
8965:   ISDestroy(&rows);

8967:   if (ccomm != MPI_COMM_NULL) {
8968:     Mat_MPIAIJ *a, *b;
8969:     IS          from, to;
8970:     Vec         gvec;
8971:     PetscInt    lsize;

8973:     MatCreate(ccomm, B);
8974:     MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N);
8975:     MatSetType(*B, MATAIJ);
8976:     PetscLayoutDestroy(&((*B)->rmap));
8977:     PetscLayoutSetUp((*B)->cmap);
8978:     a = (Mat_MPIAIJ *)At->data;
8979:     b = (Mat_MPIAIJ *)(*B)->data;
8980:     MPI_Comm_size(ccomm, &b->size);
8981:     MPI_Comm_rank(ccomm, &b->rank);
8982:     PetscObjectReference((PetscObject)a->A);
8983:     PetscObjectReference((PetscObject)a->B);
8984:     b->A = a->A;
8985:     b->B = a->B;

8987:     b->donotstash   = a->donotstash;
8988:     b->roworiented  = a->roworiented;
8989:     b->rowindices   = NULL;
8990:     b->rowvalues    = NULL;
8991:     b->getrowactive = PETSC_FALSE;

8993:     (*B)->rmap         = rmap;
8994:     (*B)->factortype   = A->factortype;
8995:     (*B)->assembled    = PETSC_TRUE;
8996:     (*B)->insertmode   = NOT_SET_VALUES;
8997:     (*B)->preallocated = PETSC_TRUE;

8999:     if (a->colmap) {
9000: #if defined(PETSC_USE_CTABLE)
9001:       PetscTableCreateCopy(a->colmap, &b->colmap);
9002: #else
9003:       PetscMalloc1(At->cmap->N, &b->colmap);
9004:       PetscArraycpy(b->colmap, a->colmap, At->cmap->N);
9005: #endif
9006:     } else b->colmap = NULL;
9007:     if (a->garray) {
9008:       PetscInt len;
9009:       len = a->B->cmap->n;
9010:       PetscMalloc1(len + 1, &b->garray);
9011:       if (len) PetscArraycpy(b->garray, a->garray, len);
9012:     } else b->garray = NULL;

9014:     PetscObjectReference((PetscObject)a->lvec);
9015:     b->lvec = a->lvec;

9017:     /* cannot use VecScatterCopy */
9018:     VecGetLocalSize(b->lvec, &lsize);
9019:     ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from);
9020:     ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to);
9021:     MatCreateVecs(*B, &gvec, NULL);
9022:     VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx);
9023:     ISDestroy(&from);
9024:     ISDestroy(&to);
9025:     VecDestroy(&gvec);
9026:   }
9027:   MatDestroy(&At);
9028:   return 0;
9029: }