Actual source code: gasm.c

  1: /*
  2:   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
  3:   In this version each MPI rank may intersect multiple subdomains and any subdomain may
  4:   intersect multiple MPI ranks.  Intersections of subdomains with MPI ranks are called *local
  5:   subdomains*.

  7:        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
  8:        n    - actual number of local subdomains on this rank (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
  9:        nmax - maximum number of local subdomains per rank    (calculated in PCSetUp_GASM())
 10: */
 11: #include <petsc/private/pcimpl.h>
 12: #include <petscdm.h>

 14: typedef struct {
 15:   PetscInt   N, n, nmax;
 16:   PetscInt   overlap;                /* overlap requested by user */
 17:   PCGASMType type;                   /* use reduced interpolation, restriction or both */
 18:   PetscBool  type_set;               /* if user set this value (so won't change it for symmetric problems) */
 19:   PetscBool  same_subdomain_solvers; /* flag indicating whether all local solvers are same */
 20:   PetscBool  sort_indices;           /* flag to sort subdomain indices */
 21:   PetscBool  user_subdomains;        /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
 22:   PetscBool  dm_subdomains;          /* whether DM is allowed to define subdomains */
 23:   PetscBool  hierarchicalpartitioning;
 24:   IS        *ois;           /* index sets that define the outer (conceptually, overlapping) subdomains */
 25:   IS        *iis;           /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
 26:   KSP       *ksp;           /* linear solvers for each subdomain */
 27:   Mat       *pmat;          /* subdomain block matrices */
 28:   Vec        gx, gy;        /* Merged work vectors */
 29:   Vec       *x, *y;         /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
 30:   VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
 31:   VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
 32:   VecScatter pctoouter;
 33:   IS         permutationIS;
 34:   Mat        permutationP;
 35:   Mat        pcmat;
 36:   Vec        pcx, pcy;
 37: } PC_GASM;

 39: static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc, PetscInt **numbering, PetscInt **permutation)
 40: {
 41:   PC_GASM *osm = (PC_GASM *)pc->data;
 42:   PetscInt i;

 44:   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
 45:   PetscMalloc2(osm->n, numbering, osm->n, permutation);
 46:   PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc), osm->n, (PetscObject *)osm->iis, NULL, *numbering);
 47:   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
 48:   PetscSortIntWithPermutation(osm->n, *numbering, *permutation);
 49:   return 0;
 50: }

 52: static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
 53: {
 54:   PC_GASM        *osm = (PC_GASM *)pc->data;
 55:   PetscInt        j, nidx;
 56:   const PetscInt *idx;
 57:   PetscViewer     sviewer;
 58:   char           *cidx;

 61:   /* Inner subdomains. */
 62:   ISGetLocalSize(osm->iis[i], &nidx);
 63:   /*
 64:    No more than 15 characters per index plus a space.
 65:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 66:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 67:    For nidx == 0, the whole string 16 '\0'.
 68:    */
 69: #define len 16 * (nidx + 1) + 1
 70:   PetscMalloc1(len, &cidx);
 71:   PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
 72: #undef len
 73:   ISGetIndices(osm->iis[i], &idx);
 74:   for (j = 0; j < nidx; ++j) PetscViewerStringSPrintf(sviewer, "%" PetscInt_FMT " ", idx[j]);
 75:   ISRestoreIndices(osm->iis[i], &idx);
 76:   PetscViewerDestroy(&sviewer);
 77:   PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
 78:   PetscViewerFlush(viewer);
 79:   PetscViewerASCIIPushSynchronized(viewer);
 80:   PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
 81:   PetscViewerFlush(viewer);
 82:   PetscViewerASCIIPopSynchronized(viewer);
 83:   PetscViewerASCIIPrintf(viewer, "\n");
 84:   PetscViewerFlush(viewer);
 85:   PetscFree(cidx);
 86:   /* Outer subdomains. */
 87:   ISGetLocalSize(osm->ois[i], &nidx);
 88:   /*
 89:    No more than 15 characters per index plus a space.
 90:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 91:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 92:    For nidx == 0, the whole string 16 '\0'.
 93:    */
 94: #define len 16 * (nidx + 1) + 1
 95:   PetscMalloc1(len, &cidx);
 96:   PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
 97: #undef len
 98:   ISGetIndices(osm->ois[i], &idx);
 99:   for (j = 0; j < nidx; ++j) PetscViewerStringSPrintf(sviewer, "%" PetscInt_FMT " ", idx[j]);
100:   PetscViewerDestroy(&sviewer);
101:   ISRestoreIndices(osm->ois[i], &idx);
102:   PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
103:   PetscViewerFlush(viewer);
104:   PetscViewerASCIIPushSynchronized(viewer);
105:   PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
106:   PetscViewerFlush(viewer);
107:   PetscViewerASCIIPopSynchronized(viewer);
108:   PetscViewerASCIIPrintf(viewer, "\n");
109:   PetscViewerFlush(viewer);
110:   PetscFree(cidx);
111:   return 0;
112: }

114: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
115: {
116:   PC_GASM    *osm = (PC_GASM *)pc->data;
117:   const char *prefix;
118:   char        fname[PETSC_MAX_PATH_LEN + 1];
119:   PetscInt    l, d, count;
120:   PetscBool   found;
121:   PetscViewer viewer, sviewer = NULL;
122:   PetscInt   *numbering, *permutation; /* global numbering of locally-supported subdomains and the permutation from the local ordering */

124:   PCGetOptionsPrefix(pc, &prefix);
125:   PetscOptionsHasName(NULL, prefix, "-pc_gasm_print_subdomains", &found);
126:   if (!found) return 0;
127:   PetscOptionsGetString(NULL, prefix, "-pc_gasm_print_subdomains", fname, sizeof(fname), &found);
128:   if (!found) PetscStrcpy(fname, "stdout");
129:   PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc), fname, &viewer);
130:   /*
131:    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
132:    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
133:   */
134:   PetscObjectName((PetscObject)viewer);
135:   l = 0;
136:   PCGASMComputeGlobalSubdomainNumbering_Private(pc, &numbering, &permutation);
137:   for (count = 0; count < osm->N; ++count) {
138:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
139:     if (l < osm->n) {
140:       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
141:       if (numbering[d] == count) {
142:         PetscViewerGetSubViewer(viewer, ((PetscObject)osm->ois[d])->comm, &sviewer);
143:         PCGASMSubdomainView_Private(pc, d, sviewer);
144:         PetscViewerRestoreSubViewer(viewer, ((PetscObject)osm->ois[d])->comm, &sviewer);
145:         ++l;
146:       }
147:     }
148:     MPI_Barrier(PetscObjectComm((PetscObject)pc));
149:   }
150:   PetscFree2(numbering, permutation);
151:   PetscViewerDestroy(&viewer);
152:   return 0;
153: }

155: static PetscErrorCode PCView_GASM(PC pc, PetscViewer viewer)
156: {
157:   PC_GASM    *osm = (PC_GASM *)pc->data;
158:   const char *prefix;
159:   PetscMPIInt rank, size;
160:   PetscInt    bsz;
161:   PetscBool   iascii, view_subdomains = PETSC_FALSE;
162:   PetscViewer sviewer;
163:   PetscInt    count, l;
164:   char        overlap[256]     = "user-defined overlap";
165:   char        gsubdomains[256] = "unknown total number of subdomains";
166:   char        msubdomains[256] = "unknown max number of local subdomains";
167:   PetscInt   *numbering, *permutation; /* global numbering of locally-supported subdomains and the permutation from the local ordering */

169:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
170:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);

172:   if (osm->overlap >= 0) PetscSNPrintf(overlap, sizeof(overlap), "requested amount of overlap = %" PetscInt_FMT, osm->overlap);
173:   if (osm->N != PETSC_DETERMINE) PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %" PetscInt_FMT, osm->N);
174:   if (osm->nmax != PETSC_DETERMINE) PetscSNPrintf(msubdomains, sizeof(msubdomains), "max number of local subdomains = %" PetscInt_FMT, osm->nmax);

176:   PCGetOptionsPrefix(pc, &prefix);
177:   PetscOptionsGetBool(NULL, prefix, "-pc_gasm_view_subdomains", &view_subdomains, NULL);

179:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
180:   if (iascii) {
181:     /*
182:      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
183:      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
184:      collectively on the comm.
185:      */
186:     PetscObjectName((PetscObject)viewer);
187:     PetscViewerASCIIPrintf(viewer, "  Restriction/interpolation type: %s\n", PCGASMTypes[osm->type]);
188:     PetscViewerASCIIPrintf(viewer, "  %s\n", overlap);
189:     PetscViewerASCIIPrintf(viewer, "  %s\n", gsubdomains);
190:     PetscViewerASCIIPrintf(viewer, "  %s\n", msubdomains);
191:     PetscViewerASCIIPushSynchronized(viewer);
192:     PetscViewerASCIISynchronizedPrintf(viewer, "  [%d|%d] number of locally-supported subdomains = %" PetscInt_FMT "\n", rank, size, osm->n);
193:     PetscViewerFlush(viewer);
194:     PetscViewerASCIIPopSynchronized(viewer);
195:     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
196:     PetscViewerASCIIPrintf(viewer, "  Subdomain solver info is as follows:\n");
197:     PetscViewerASCIIPushTab(viewer);
198:     PetscViewerASCIIPrintf(viewer, "  - - - - - - - - - - - - - - - - - -\n");
199:     /* Make sure that everybody waits for the banner to be printed. */
200:     MPI_Barrier(PetscObjectComm((PetscObject)viewer));
201:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
202:     PCGASMComputeGlobalSubdomainNumbering_Private(pc, &numbering, &permutation);
203:     l = 0;
204:     for (count = 0; count < osm->N; ++count) {
205:       PetscMPIInt srank, ssize;
206:       if (l < osm->n) {
207:         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
208:         if (numbering[d] == count) {
209:           MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
210:           MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
211:           PetscViewerGetSubViewer(viewer, ((PetscObject)osm->ois[d])->comm, &sviewer);
212:           ISGetLocalSize(osm->ois[d], &bsz);
213:           PetscViewerASCIISynchronizedPrintf(sviewer, "  [%d|%d] (subcomm [%d|%d]) local subdomain number %" PetscInt_FMT ", local size = %" PetscInt_FMT "\n", rank, size, srank, ssize, d, bsz);
214:           PetscViewerFlush(sviewer);
215:           if (view_subdomains) PCGASMSubdomainView_Private(pc, d, sviewer);
216:           if (!pc->setupcalled) {
217:             PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");
218:           } else {
219:             KSPView(osm->ksp[d], sviewer);
220:           }
221:           PetscViewerASCIIPrintf(sviewer, "  - - - - - - - - - - - - - - - - - -\n");
222:           PetscViewerFlush(sviewer);
223:           PetscViewerRestoreSubViewer(viewer, ((PetscObject)osm->ois[d])->comm, &sviewer);
224:           ++l;
225:         }
226:       }
227:       MPI_Barrier(PetscObjectComm((PetscObject)pc));
228:     }
229:     PetscFree2(numbering, permutation);
230:     PetscViewerASCIIPopTab(viewer);
231:     PetscViewerFlush(viewer);
232:     /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
233:     PetscViewerASCIIPopSynchronized(viewer);
234:   }
235:   return 0;
236: }

238: PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);

240: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
241: {
242:   PC_GASM        *osm = (PC_GASM *)pc->data;
243:   MatPartitioning part;
244:   MPI_Comm        comm;
245:   PetscMPIInt     size;
246:   PetscInt        nlocalsubdomains, fromrows_localsize;
247:   IS              partitioning, fromrows, isn;
248:   Vec             outervec;

250:   PetscObjectGetComm((PetscObject)pc, &comm);
251:   MPI_Comm_size(comm, &size);
252:   /* we do not need a hierarchical partitioning when
253:     * the total number of subdomains is consistent with
254:     * the number of MPI tasks.
255:     * For the following cases, we do not need to use HP
256:     * */
257:   if (osm->N == PETSC_DETERMINE || osm->N >= size || osm->N == 1) return 0;
259:   nlocalsubdomains = size / osm->N;
260:   osm->n           = 1;
261:   MatPartitioningCreate(comm, &part);
262:   MatPartitioningSetAdjacency(part, pc->pmat);
263:   MatPartitioningSetType(part, MATPARTITIONINGHIERARCH);
264:   MatPartitioningHierarchicalSetNcoarseparts(part, osm->N);
265:   MatPartitioningHierarchicalSetNfineparts(part, nlocalsubdomains);
266:   MatPartitioningSetFromOptions(part);
267:   /* get new rank owner number of each vertex */
268:   MatPartitioningApply(part, &partitioning);
269:   ISBuildTwoSided(partitioning, NULL, &fromrows);
270:   ISPartitioningToNumbering(partitioning, &isn);
271:   ISDestroy(&isn);
272:   ISGetLocalSize(fromrows, &fromrows_localsize);
273:   MatPartitioningDestroy(&part);
274:   MatCreateVecs(pc->pmat, &outervec, NULL);
275:   VecCreateMPI(comm, fromrows_localsize, PETSC_DETERMINE, &(osm->pcx));
276:   VecDuplicate(osm->pcx, &(osm->pcy));
277:   VecScatterCreate(osm->pcx, NULL, outervec, fromrows, &(osm->pctoouter));
278:   MatCreateSubMatrix(pc->pmat, fromrows, fromrows, MAT_INITIAL_MATRIX, &(osm->permutationP));
279:   PetscObjectReference((PetscObject)fromrows);
280:   osm->permutationIS = fromrows;
281:   osm->pcmat         = pc->pmat;
282:   PetscObjectReference((PetscObject)osm->permutationP);
283:   pc->pmat = osm->permutationP;
284:   VecDestroy(&outervec);
285:   ISDestroy(&fromrows);
286:   ISDestroy(&partitioning);
287:   osm->n = PETSC_DETERMINE;
288:   return 0;
289: }

291: static PetscErrorCode PCSetUp_GASM(PC pc)
292: {
293:   PC_GASM        *osm = (PC_GASM *)pc->data;
294:   PetscInt        i, nInnerIndices, nTotalInnerIndices;
295:   PetscMPIInt     rank, size;
296:   MatReuse        scall = MAT_REUSE_MATRIX;
297:   KSP             ksp;
298:   PC              subpc;
299:   const char     *prefix, *pprefix;
300:   Vec             x, y;
301:   PetscInt        oni;   /* Number of indices in the i-th local outer subdomain.               */
302:   const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain.             */
303:   PetscInt        on;    /* Number of indices in the disjoint union of local outer subdomains. */
304:   PetscInt       *oidx;  /* Indices in the disjoint union of local outer subdomains. */
305:   IS              gois;  /* Disjoint union the global indices of outer subdomains.             */
306:   IS              goid;  /* Identity IS of the size of the disjoint union of outer subdomains. */
307:   PetscScalar    *gxarray, *gyarray;
308:   PetscInt        gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
309:   PetscInt        num_subdomains  = 0;
310:   DM             *subdomain_dm    = NULL;
311:   char          **subdomain_names = NULL;
312:   PetscInt       *numbering;

314:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
315:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
316:   if (!pc->setupcalled) {
317:     /* use a hierarchical partitioning */
318:     if (osm->hierarchicalpartitioning) PCGASMSetHierarchicalPartitioning(pc);
319:     if (osm->n == PETSC_DETERMINE) {
320:       if (osm->N != PETSC_DETERMINE) {
321:         /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
322:         PCGASMCreateSubdomains(pc->pmat, osm->N, &osm->n, &osm->iis);
323:       } else if (osm->dm_subdomains && pc->dm) {
324:         /* try pc->dm next, if allowed */
325:         PetscInt d;
326:         IS      *inner_subdomain_is, *outer_subdomain_is;
327:         DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
328:         if (num_subdomains) PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
329:         for (d = 0; d < num_subdomains; ++d) {
330:           if (inner_subdomain_is) ISDestroy(&inner_subdomain_is[d]);
331:           if (outer_subdomain_is) ISDestroy(&outer_subdomain_is[d]);
332:         }
333:         PetscFree(inner_subdomain_is);
334:         PetscFree(outer_subdomain_is);
335:       } else {
336:         /* still no subdomains; use one per rank */
337:         osm->nmax = osm->n = 1;
338:         MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
339:         osm->N = size;
340:         PCGASMCreateLocalSubdomains(pc->pmat, osm->n, &osm->iis);
341:       }
342:     }
343:     if (!osm->iis) {
344:       /*
345:        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
346:        We create the requisite number of local inner subdomains and then expand them into
347:        out subdomains, if necessary.
348:        */
349:       PCGASMCreateLocalSubdomains(pc->pmat, osm->n, &osm->iis);
350:     }
351:     if (!osm->ois) {
352:       /*
353:             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
354:             has been requested, copy the inner subdomains over so they can be modified.
355:       */
356:       PetscMalloc1(osm->n, &osm->ois);
357:       for (i = 0; i < osm->n; ++i) {
358:         if (osm->overlap > 0 && osm->N > 1) { /* With positive overlap, osm->iis[i] will be modified */
359:           ISDuplicate(osm->iis[i], (osm->ois) + i);
360:           ISCopy(osm->iis[i], osm->ois[i]);
361:         } else {
362:           PetscObjectReference((PetscObject)((osm->iis)[i]));
363:           osm->ois[i] = osm->iis[i];
364:         }
365:       }
366:       if (osm->overlap > 0 && osm->N > 1) {
367:         /* Extend the "overlapping" regions by a number of steps */
368:         MatIncreaseOverlapSplit(pc->pmat, osm->n, osm->ois, osm->overlap);
369:       }
370:     }

372:     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
373:     if (osm->nmax == PETSC_DETERMINE) {
374:       PetscMPIInt inwork, outwork;
375:       /* determine global number of subdomains and the max number of local subdomains */
376:       inwork = osm->n;
377:       MPIU_Allreduce(&inwork, &outwork, 1, MPI_INT, MPI_MAX, PetscObjectComm((PetscObject)pc));
378:       osm->nmax = outwork;
379:     }
380:     if (osm->N == PETSC_DETERMINE) {
381:       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
382:       PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc), osm->n, (PetscObject *)osm->ois, &osm->N, NULL);
383:     }

385:     if (osm->sort_indices) {
386:       for (i = 0; i < osm->n; i++) {
387:         ISSort(osm->ois[i]);
388:         ISSort(osm->iis[i]);
389:       }
390:     }
391:     PCGetOptionsPrefix(pc, &prefix);
392:     PCGASMPrintSubdomains(pc);

394:     /*
395:        Merge the ISs, create merged vectors and restrictions.
396:      */
397:     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
398:     on = 0;
399:     for (i = 0; i < osm->n; i++) {
400:       ISGetLocalSize(osm->ois[i], &oni);
401:       on += oni;
402:     }
403:     PetscMalloc1(on, &oidx);
404:     on = 0;
405:     /* Merge local indices together */
406:     for (i = 0; i < osm->n; i++) {
407:       ISGetLocalSize(osm->ois[i], &oni);
408:       ISGetIndices(osm->ois[i], &oidxi);
409:       PetscArraycpy(oidx + on, oidxi, oni);
410:       ISRestoreIndices(osm->ois[i], &oidxi);
411:       on += oni;
412:     }
413:     ISCreateGeneral(((PetscObject)(pc))->comm, on, oidx, PETSC_OWN_POINTER, &gois);
414:     nTotalInnerIndices = 0;
415:     for (i = 0; i < osm->n; i++) {
416:       ISGetLocalSize(osm->iis[i], &nInnerIndices);
417:       nTotalInnerIndices += nInnerIndices;
418:     }
419:     VecCreateMPI(((PetscObject)(pc))->comm, nTotalInnerIndices, PETSC_DETERMINE, &x);
420:     VecDuplicate(x, &y);

422:     VecCreateMPI(PetscObjectComm((PetscObject)pc), on, PETSC_DECIDE, &osm->gx);
423:     VecDuplicate(osm->gx, &osm->gy);
424:     VecGetOwnershipRange(osm->gx, &gostart, NULL);
425:     ISCreateStride(PetscObjectComm((PetscObject)pc), on, gostart, 1, &goid);
426:     /* gois might indices not on local */
427:     VecScatterCreate(x, gois, osm->gx, goid, &(osm->gorestriction));
428:     PetscMalloc1(osm->n, &numbering);
429:     PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc), osm->n, (PetscObject *)osm->ois, NULL, numbering);
430:     VecDestroy(&x);
431:     ISDestroy(&gois);

433:     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
434:     {
435:       PetscInt        ini;   /* Number of indices the i-th a local inner subdomain. */
436:       PetscInt        in;    /* Number of indices in the disjoint union of local inner subdomains. */
437:       PetscInt       *iidx;  /* Global indices in the merged local inner subdomain. */
438:       PetscInt       *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
439:       IS              giis;  /* IS for the disjoint union of inner subdomains. */
440:       IS              giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
441:       PetscScalar    *array;
442:       const PetscInt *indices;
443:       PetscInt        k;
444:       on = 0;
445:       for (i = 0; i < osm->n; i++) {
446:         ISGetLocalSize(osm->ois[i], &oni);
447:         on += oni;
448:       }
449:       PetscMalloc1(on, &iidx);
450:       PetscMalloc1(on, &ioidx);
451:       VecGetArray(y, &array);
452:       /* set communicator id to determine where overlap is */
453:       in = 0;
454:       for (i = 0; i < osm->n; i++) {
455:         ISGetLocalSize(osm->iis[i], &ini);
456:         for (k = 0; k < ini; ++k) array[in + k] = numbering[i];
457:         in += ini;
458:       }
459:       VecRestoreArray(y, &array);
460:       VecScatterBegin(osm->gorestriction, y, osm->gy, INSERT_VALUES, SCATTER_FORWARD);
461:       VecScatterEnd(osm->gorestriction, y, osm->gy, INSERT_VALUES, SCATTER_FORWARD);
462:       VecGetOwnershipRange(osm->gy, &gostart, NULL);
463:       VecGetArray(osm->gy, &array);
464:       on = 0;
465:       in = 0;
466:       for (i = 0; i < osm->n; i++) {
467:         ISGetLocalSize(osm->ois[i], &oni);
468:         ISGetIndices(osm->ois[i], &indices);
469:         for (k = 0; k < oni; k++) {
470:           /*  skip overlapping indices to get inner domain */
471:           if (PetscRealPart(array[on + k]) != numbering[i]) continue;
472:           iidx[in]    = indices[k];
473:           ioidx[in++] = gostart + on + k;
474:         }
475:         ISRestoreIndices(osm->ois[i], &indices);
476:         on += oni;
477:       }
478:       VecRestoreArray(osm->gy, &array);
479:       ISCreateGeneral(PetscObjectComm((PetscObject)pc), in, iidx, PETSC_OWN_POINTER, &giis);
480:       ISCreateGeneral(PetscObjectComm((PetscObject)pc), in, ioidx, PETSC_OWN_POINTER, &giois);
481:       VecScatterCreate(y, giis, osm->gy, giois, &osm->girestriction);
482:       VecDestroy(&y);
483:       ISDestroy(&giis);
484:       ISDestroy(&giois);
485:     }
486:     ISDestroy(&goid);
487:     PetscFree(numbering);

489:     /* Create the subdomain work vectors. */
490:     PetscMalloc1(osm->n, &osm->x);
491:     PetscMalloc1(osm->n, &osm->y);
492:     VecGetArray(osm->gx, &gxarray);
493:     VecGetArray(osm->gy, &gyarray);
494:     for (i = 0, on = 0; i < osm->n; ++i, on += oni) {
495:       PetscInt oNi;
496:       ISGetLocalSize(osm->ois[i], &oni);
497:       /* on a sub communicator */
498:       ISGetSize(osm->ois[i], &oNi);
499:       VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm, 1, oni, oNi, gxarray + on, &osm->x[i]);
500:       VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm, 1, oni, oNi, gyarray + on, &osm->y[i]);
501:     }
502:     VecRestoreArray(osm->gx, &gxarray);
503:     VecRestoreArray(osm->gy, &gyarray);
504:     /* Create the subdomain solvers */
505:     PetscMalloc1(osm->n, &osm->ksp);
506:     for (i = 0; i < osm->n; i++) {
507:       char subprefix[PETSC_MAX_PATH_LEN + 1];
508:       KSPCreate(((PetscObject)(osm->ois[i]))->comm, &ksp);
509:       KSPSetErrorIfNotConverged(ksp, pc->erroriffailure);
510:       PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1);
511:       KSPSetType(ksp, KSPPREONLY);
512:       KSPGetPC(ksp, &subpc); /* Why do we need this here? */
513:       if (subdomain_dm) {
514:         KSPSetDM(ksp, subdomain_dm[i]);
515:         DMDestroy(subdomain_dm + i);
516:       }
517:       PCGetOptionsPrefix(pc, &prefix);
518:       KSPSetOptionsPrefix(ksp, prefix);
519:       if (subdomain_names && subdomain_names[i]) {
520:         PetscSNPrintf(subprefix, PETSC_MAX_PATH_LEN, "sub_%s_", subdomain_names[i]);
521:         KSPAppendOptionsPrefix(ksp, subprefix);
522:         PetscFree(subdomain_names[i]);
523:       }
524:       KSPAppendOptionsPrefix(ksp, "sub_");
525:       osm->ksp[i] = ksp;
526:     }
527:     PetscFree(subdomain_dm);
528:     PetscFree(subdomain_names);
529:     scall = MAT_INITIAL_MATRIX;
530:   } else { /* if (pc->setupcalled) */
531:     /*
532:        Destroy the submatrices from the previous iteration
533:     */
534:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
535:       MatDestroyMatrices(osm->n, &osm->pmat);
536:       scall = MAT_INITIAL_MATRIX;
537:     }
538:     if (osm->permutationIS) {
539:       MatCreateSubMatrix(pc->pmat, osm->permutationIS, osm->permutationIS, scall, &osm->permutationP);
540:       PetscObjectReference((PetscObject)osm->permutationP);
541:       osm->pcmat = pc->pmat;
542:       pc->pmat   = osm->permutationP;
543:     }
544:   }

546:   /*
547:      Extract the submatrices.
548:   */
549:   if (size > 1) {
550:     MatCreateSubMatricesMPI(pc->pmat, osm->n, osm->ois, osm->ois, scall, &osm->pmat);
551:   } else {
552:     MatCreateSubMatrices(pc->pmat, osm->n, osm->ois, osm->ois, scall, &osm->pmat);
553:   }
554:   if (scall == MAT_INITIAL_MATRIX) {
555:     PetscObjectGetOptionsPrefix((PetscObject)pc->pmat, &pprefix);
556:     for (i = 0; i < osm->n; i++) { PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i], pprefix); }
557:   }

559:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
560:      different boundary conditions for the submatrices than for the global problem) */
561:   PCModifySubMatrices(pc, osm->n, osm->ois, osm->ois, osm->pmat, pc->modifysubmatricesP);

563:   /*
564:      Loop over submatrices putting them into local ksps
565:   */
566:   for (i = 0; i < osm->n; i++) {
567:     KSPSetOperators(osm->ksp[i], osm->pmat[i], osm->pmat[i]);
568:     KSPGetOptionsPrefix(osm->ksp[i], &prefix);
569:     MatSetOptionsPrefix(osm->pmat[i], prefix);
570:     if (!pc->setupcalled) KSPSetFromOptions(osm->ksp[i]);
571:   }
572:   if (osm->pcmat) {
573:     MatDestroy(&pc->pmat);
574:     pc->pmat   = osm->pcmat;
575:     osm->pcmat = NULL;
576:   }
577:   return 0;
578: }

580: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
581: {
582:   PC_GASM *osm = (PC_GASM *)pc->data;
583:   PetscInt i;

585:   for (i = 0; i < osm->n; i++) KSPSetUp(osm->ksp[i]);
586:   return 0;
587: }

589: static PetscErrorCode PCApply_GASM(PC pc, Vec xin, Vec yout)
590: {
591:   PC_GASM    *osm = (PC_GASM *)pc->data;
592:   PetscInt    i;
593:   Vec         x, y;
594:   ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE;

596:   if (osm->pctoouter) {
597:     VecScatterBegin(osm->pctoouter, xin, osm->pcx, INSERT_VALUES, SCATTER_REVERSE);
598:     VecScatterEnd(osm->pctoouter, xin, osm->pcx, INSERT_VALUES, SCATTER_REVERSE);
599:     x = osm->pcx;
600:     y = osm->pcy;
601:   } else {
602:     x = xin;
603:     y = yout;
604:   }
605:   /*
606:      support for limiting the restriction or interpolation only to the inner
607:      subdomain values (leaving the other values 0).
608:   */
609:   if (!(osm->type & PC_GASM_RESTRICT)) {
610:     /* have to zero the work RHS since scatter may leave some slots empty */
611:     VecZeroEntries(osm->gx);
612:     VecScatterBegin(osm->girestriction, x, osm->gx, INSERT_VALUES, forward);
613:   } else {
614:     VecScatterBegin(osm->gorestriction, x, osm->gx, INSERT_VALUES, forward);
615:   }
616:   VecZeroEntries(osm->gy);
617:   if (!(osm->type & PC_GASM_RESTRICT)) {
618:     VecScatterEnd(osm->girestriction, x, osm->gx, INSERT_VALUES, forward);
619:   } else {
620:     VecScatterEnd(osm->gorestriction, x, osm->gx, INSERT_VALUES, forward);
621:   }
622:   /* do the subdomain solves */
623:   for (i = 0; i < osm->n; ++i) {
624:     KSPSolve(osm->ksp[i], osm->x[i], osm->y[i]);
625:     KSPCheckSolve(osm->ksp[i], pc, osm->y[i]);
626:   }
627:   /* do we need to zero y? */
628:   VecZeroEntries(y);
629:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
630:     VecScatterBegin(osm->girestriction, osm->gy, y, ADD_VALUES, reverse);
631:     VecScatterEnd(osm->girestriction, osm->gy, y, ADD_VALUES, reverse);
632:   } else {
633:     VecScatterBegin(osm->gorestriction, osm->gy, y, ADD_VALUES, reverse);
634:     VecScatterEnd(osm->gorestriction, osm->gy, y, ADD_VALUES, reverse);
635:   }
636:   if (osm->pctoouter) {
637:     VecScatterBegin(osm->pctoouter, y, yout, INSERT_VALUES, SCATTER_FORWARD);
638:     VecScatterEnd(osm->pctoouter, y, yout, INSERT_VALUES, SCATTER_FORWARD);
639:   }
640:   return 0;
641: }

643: static PetscErrorCode PCMatApply_GASM(PC pc, Mat Xin, Mat Yout)
644: {
645:   PC_GASM    *osm = (PC_GASM *)pc->data;
646:   Mat         X, Y, O = NULL, Z, W;
647:   Vec         x, y;
648:   PetscInt    i, m, M, N;
649:   ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE;

652:   MatGetSize(Xin, NULL, &N);
653:   if (osm->pctoouter) {
654:     VecGetLocalSize(osm->pcx, &m);
655:     VecGetSize(osm->pcx, &M);
656:     MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]), m, PETSC_DECIDE, M, N, NULL, &O);
657:     for (i = 0; i < N; ++i) {
658:       MatDenseGetColumnVecRead(Xin, i, &x);
659:       MatDenseGetColumnVecWrite(O, i, &y);
660:       VecScatterBegin(osm->pctoouter, x, y, INSERT_VALUES, SCATTER_REVERSE);
661:       VecScatterEnd(osm->pctoouter, x, y, INSERT_VALUES, SCATTER_REVERSE);
662:       MatDenseRestoreColumnVecWrite(O, i, &y);
663:       MatDenseRestoreColumnVecRead(Xin, i, &x);
664:     }
665:     X = Y = O;
666:   } else {
667:     X = Xin;
668:     Y = Yout;
669:   }
670:   /*
671:      support for limiting the restriction or interpolation only to the inner
672:      subdomain values (leaving the other values 0).
673:   */
674:   VecGetLocalSize(osm->x[0], &m);
675:   VecGetSize(osm->x[0], &M);
676:   MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]), m, PETSC_DECIDE, M, N, NULL, &Z);
677:   for (i = 0; i < N; ++i) {
678:     MatDenseGetColumnVecRead(X, i, &x);
679:     MatDenseGetColumnVecWrite(Z, i, &y);
680:     if (!(osm->type & PC_GASM_RESTRICT)) {
681:       /* have to zero the work RHS since scatter may leave some slots empty */
682:       VecZeroEntries(y);
683:       VecScatterBegin(osm->girestriction, x, y, INSERT_VALUES, forward);
684:       VecScatterEnd(osm->girestriction, x, y, INSERT_VALUES, forward);
685:     } else {
686:       VecScatterBegin(osm->gorestriction, x, y, INSERT_VALUES, forward);
687:       VecScatterEnd(osm->gorestriction, x, y, INSERT_VALUES, forward);
688:     }
689:     MatDenseRestoreColumnVecWrite(Z, i, &y);
690:     MatDenseRestoreColumnVecRead(X, i, &x);
691:   }
692:   MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]), m, PETSC_DECIDE, M, N, NULL, &W);
693:   MatSetOption(Z, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
694:   MatAssemblyBegin(Z, MAT_FINAL_ASSEMBLY);
695:   MatAssemblyEnd(Z, MAT_FINAL_ASSEMBLY);
696:   /* do the subdomain solve */
697:   KSPMatSolve(osm->ksp[0], Z, W);
698:   KSPCheckSolve(osm->ksp[0], pc, NULL);
699:   MatDestroy(&Z);
700:   /* do we need to zero y? */
701:   MatZeroEntries(Y);
702:   for (i = 0; i < N; ++i) {
703:     MatDenseGetColumnVecWrite(Y, i, &y);
704:     MatDenseGetColumnVecRead(W, i, &x);
705:     if (!(osm->type & PC_GASM_INTERPOLATE)) {
706:       VecScatterBegin(osm->girestriction, x, y, ADD_VALUES, reverse);
707:       VecScatterEnd(osm->girestriction, x, y, ADD_VALUES, reverse);
708:     } else {
709:       VecScatterBegin(osm->gorestriction, x, y, ADD_VALUES, reverse);
710:       VecScatterEnd(osm->gorestriction, x, y, ADD_VALUES, reverse);
711:     }
712:     MatDenseRestoreColumnVecRead(W, i, &x);
713:     if (osm->pctoouter) {
714:       MatDenseGetColumnVecWrite(Yout, i, &x);
715:       VecScatterBegin(osm->pctoouter, y, x, INSERT_VALUES, SCATTER_FORWARD);
716:       VecScatterEnd(osm->pctoouter, y, x, INSERT_VALUES, SCATTER_FORWARD);
717:       MatDenseRestoreColumnVecRead(Yout, i, &x);
718:     }
719:     MatDenseRestoreColumnVecWrite(Y, i, &y);
720:   }
721:   MatDestroy(&W);
722:   MatDestroy(&O);
723:   return 0;
724: }

726: static PetscErrorCode PCApplyTranspose_GASM(PC pc, Vec xin, Vec yout)
727: {
728:   PC_GASM    *osm = (PC_GASM *)pc->data;
729:   PetscInt    i;
730:   Vec         x, y;
731:   ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE;

733:   if (osm->pctoouter) {
734:     VecScatterBegin(osm->pctoouter, xin, osm->pcx, INSERT_VALUES, SCATTER_REVERSE);
735:     VecScatterEnd(osm->pctoouter, xin, osm->pcx, INSERT_VALUES, SCATTER_REVERSE);
736:     x = osm->pcx;
737:     y = osm->pcy;
738:   } else {
739:     x = xin;
740:     y = yout;
741:   }
742:   /*
743:      Support for limiting the restriction or interpolation to only local
744:      subdomain values (leaving the other values 0).

746:      Note: these are reversed from the PCApply_GASM() because we are applying the
747:      transpose of the three terms
748:   */
749:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
750:     /* have to zero the work RHS since scatter may leave some slots empty */
751:     VecZeroEntries(osm->gx);
752:     VecScatterBegin(osm->girestriction, x, osm->gx, INSERT_VALUES, forward);
753:   } else {
754:     VecScatterBegin(osm->gorestriction, x, osm->gx, INSERT_VALUES, forward);
755:   }
756:   VecZeroEntries(osm->gy);
757:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
758:     VecScatterEnd(osm->girestriction, x, osm->gx, INSERT_VALUES, forward);
759:   } else {
760:     VecScatterEnd(osm->gorestriction, x, osm->gx, INSERT_VALUES, forward);
761:   }
762:   /* do the local solves */
763:   for (i = 0; i < osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
764:     KSPSolveTranspose(osm->ksp[i], osm->x[i], osm->y[i]);
765:     KSPCheckSolve(osm->ksp[i], pc, osm->y[i]);
766:   }
767:   VecZeroEntries(y);
768:   if (!(osm->type & PC_GASM_RESTRICT)) {
769:     VecScatterBegin(osm->girestriction, osm->gy, y, ADD_VALUES, reverse);
770:     VecScatterEnd(osm->girestriction, osm->gy, y, ADD_VALUES, reverse);
771:   } else {
772:     VecScatterBegin(osm->gorestriction, osm->gy, y, ADD_VALUES, reverse);
773:     VecScatterEnd(osm->gorestriction, osm->gy, y, ADD_VALUES, reverse);
774:   }
775:   if (osm->pctoouter) {
776:     VecScatterBegin(osm->pctoouter, y, yout, INSERT_VALUES, SCATTER_FORWARD);
777:     VecScatterEnd(osm->pctoouter, y, yout, INSERT_VALUES, SCATTER_FORWARD);
778:   }
779:   return 0;
780: }

782: static PetscErrorCode PCReset_GASM(PC pc)
783: {
784:   PC_GASM *osm = (PC_GASM *)pc->data;
785:   PetscInt i;

787:   if (osm->ksp) {
788:     for (i = 0; i < osm->n; i++) KSPReset(osm->ksp[i]);
789:   }
790:   if (osm->pmat) {
791:     if (osm->n > 0) {
792:       PetscMPIInt size;
793:       MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
794:       if (size > 1) {
795:         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
796:         MatDestroyMatrices(osm->n, &osm->pmat);
797:       } else {
798:         MatDestroySubMatrices(osm->n, &osm->pmat);
799:       }
800:     }
801:   }
802:   if (osm->x) {
803:     for (i = 0; i < osm->n; i++) {
804:       VecDestroy(&osm->x[i]);
805:       VecDestroy(&osm->y[i]);
806:     }
807:   }
808:   VecDestroy(&osm->gx);
809:   VecDestroy(&osm->gy);

811:   VecScatterDestroy(&osm->gorestriction);
812:   VecScatterDestroy(&osm->girestriction);
813:   if (!osm->user_subdomains) {
814:     PCGASMDestroySubdomains(osm->n, &osm->ois, &osm->iis);
815:     osm->N    = PETSC_DETERMINE;
816:     osm->nmax = PETSC_DETERMINE;
817:   }
818:   if (osm->pctoouter) VecScatterDestroy(&(osm->pctoouter));
819:   if (osm->permutationIS) ISDestroy(&(osm->permutationIS));
820:   if (osm->pcx) VecDestroy(&(osm->pcx));
821:   if (osm->pcy) VecDestroy(&(osm->pcy));
822:   if (osm->permutationP) MatDestroy(&(osm->permutationP));
823:   if (osm->pcmat) MatDestroy(&osm->pcmat);
824:   return 0;
825: }

827: static PetscErrorCode PCDestroy_GASM(PC pc)
828: {
829:   PC_GASM *osm = (PC_GASM *)pc->data;
830:   PetscInt i;

832:   PCReset_GASM(pc);
833:   /* PCReset will not destroy subdomains, if user_subdomains is true. */
834:   PCGASMDestroySubdomains(osm->n, &osm->ois, &osm->iis);
835:   if (osm->ksp) {
836:     for (i = 0; i < osm->n; i++) KSPDestroy(&osm->ksp[i]);
837:     PetscFree(osm->ksp);
838:   }
839:   PetscFree(osm->x);
840:   PetscFree(osm->y);
841:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetSubdomains_C", NULL);
842:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetOverlap_C", NULL);
843:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetType_C", NULL);
844:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetSortIndices_C", NULL);
845:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMGetSubKSP_C", NULL);
846:   PetscFree(pc->data);
847:   return 0;
848: }

850: static PetscErrorCode PCSetFromOptions_GASM(PC pc, PetscOptionItems *PetscOptionsObject)
851: {
852:   PC_GASM   *osm = (PC_GASM *)pc->data;
853:   PetscInt   blocks, ovl;
854:   PetscBool  flg;
855:   PCGASMType gasmtype;

857:   PetscOptionsHeadBegin(PetscOptionsObject, "Generalized additive Schwarz options");
858:   PetscOptionsBool("-pc_gasm_use_dm_subdomains", "If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.", "PCGASMSetUseDMSubdomains", osm->dm_subdomains, &osm->dm_subdomains, &flg);
859:   PetscOptionsInt("-pc_gasm_total_subdomains", "Total number of subdomains across communicator", "PCGASMSetTotalSubdomains", osm->N, &blocks, &flg);
860:   if (flg) PCGASMSetTotalSubdomains(pc, blocks);
861:   PetscOptionsInt("-pc_gasm_overlap", "Number of overlapping degrees of freedom", "PCGASMSetOverlap", osm->overlap, &ovl, &flg);
862:   if (flg) {
863:     PCGASMSetOverlap(pc, ovl);
864:     osm->dm_subdomains = PETSC_FALSE;
865:   }
866:   flg = PETSC_FALSE;
867:   PetscOptionsEnum("-pc_gasm_type", "Type of restriction/extension", "PCGASMSetType", PCGASMTypes, (PetscEnum)osm->type, (PetscEnum *)&gasmtype, &flg);
868:   if (flg) PCGASMSetType(pc, gasmtype);
869:   PetscOptionsBool("-pc_gasm_use_hierachical_partitioning", "use hierarchical partitioning", NULL, osm->hierarchicalpartitioning, &osm->hierarchicalpartitioning, &flg);
870:   PetscOptionsHeadEnd();
871:   return 0;
872: }

874: /*------------------------------------------------------------------------------------*/

876: /*@
877:     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the communicator for `PCGASM`

879:     Logically Collective

881:     Input Parameters:
882: +   pc  - the preconditioner
883: -   N   - total number of subdomains

885:     Level: beginner

887: .seealso: `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMSetOverlap()`
888:           `PCGASMCreateSubdomains2D()`
889: @*/
890: PetscErrorCode PCGASMSetTotalSubdomains(PC pc, PetscInt N)
891: {
892:   PC_GASM    *osm = (PC_GASM *)pc->data;
893:   PetscMPIInt size, rank;


898:   PCGASMDestroySubdomains(osm->n, &osm->iis, &osm->ois);
899:   osm->ois = osm->iis = NULL;

901:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
902:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
903:   osm->N             = N;
904:   osm->n             = PETSC_DETERMINE;
905:   osm->nmax          = PETSC_DETERMINE;
906:   osm->dm_subdomains = PETSC_FALSE;
907:   return 0;
908: }

910: static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc, PetscInt n, IS iis[], IS ois[])
911: {
912:   PC_GASM *osm = (PC_GASM *)pc->data;
913:   PetscInt i;


918:   PCGASMDestroySubdomains(osm->n, &osm->iis, &osm->ois);
919:   osm->iis = osm->ois = NULL;
920:   osm->n              = n;
921:   osm->N              = PETSC_DETERMINE;
922:   osm->nmax           = PETSC_DETERMINE;
923:   if (ois) {
924:     PetscMalloc1(n, &osm->ois);
925:     for (i = 0; i < n; i++) {
926:       PetscObjectReference((PetscObject)ois[i]);
927:       osm->ois[i] = ois[i];
928:     }
929:     /*
930:        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
931:        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
932:     */
933:     osm->overlap = -1;
934:     /* inner subdomains must be provided  */
936:   } /* end if */
937:   if (iis) {
938:     PetscMalloc1(n, &osm->iis);
939:     for (i = 0; i < n; i++) {
940:       PetscObjectReference((PetscObject)iis[i]);
941:       osm->iis[i] = iis[i];
942:     }
943:     if (!ois) {
944:       osm->ois = NULL;
945:       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
946:     }
947:   }
948:   if (PetscDefined(USE_DEBUG)) {
949:     PetscInt        j, rstart, rend, *covered, lsize;
950:     const PetscInt *indices;
951:     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
952:     MatGetOwnershipRange(pc->pmat, &rstart, &rend);
953:     PetscCalloc1(rend - rstart, &covered);
954:     /* check if the current MPI rank owns indices from others */
955:     for (i = 0; i < n; i++) {
956:       ISGetIndices(osm->iis[i], &indices);
957:       ISGetLocalSize(osm->iis[i], &lsize);
958:       for (j = 0; j < lsize; j++) {
961:         covered[indices[j] - rstart] = 1;
962:       }
963:       ISRestoreIndices(osm->iis[i], &indices);
964:     }
965:     /* check if we miss any indices */
967:     PetscFree(covered);
968:   }
969:   if (iis) osm->user_subdomains = PETSC_TRUE;
970:   return 0;
971: }

973: static PetscErrorCode PCGASMSetOverlap_GASM(PC pc, PetscInt ovl)
974: {
975:   PC_GASM *osm = (PC_GASM *)pc->data;

979:   if (!pc->setupcalled) osm->overlap = ovl;
980:   return 0;
981: }

983: static PetscErrorCode PCGASMSetType_GASM(PC pc, PCGASMType type)
984: {
985:   PC_GASM *osm = (PC_GASM *)pc->data;

987:   osm->type     = type;
988:   osm->type_set = PETSC_TRUE;
989:   return 0;
990: }

992: static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc, PetscBool doSort)
993: {
994:   PC_GASM *osm = (PC_GASM *)pc->data;

996:   osm->sort_indices = doSort;
997:   return 0;
998: }

1000: /*
1001:    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1002:         In particular, it would upset the global subdomain number calculation.
1003: */
1004: static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc, PetscInt *n, PetscInt *first, KSP **ksp)
1005: {
1006:   PC_GASM *osm = (PC_GASM *)pc->data;


1010:   if (n) *n = osm->n;
1011:   if (first) {
1012:     MPI_Scan(&osm->n, first, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
1013:     *first -= osm->n;
1014:   }
1015:   if (ksp) {
1016:     /* Assume that local solves are now different; not necessarily
1017:        true, though!  This flag is used only for PCView_GASM() */
1018:     *ksp                        = osm->ksp;
1019:     osm->same_subdomain_solvers = PETSC_FALSE;
1020:   }
1021:   return 0;
1022: } /* PCGASMGetSubKSP_GASM() */

1024: /*@C
1025:     PCGASMSetSubdomains - Sets the subdomains for this MPI rank
1026:     for the additive Schwarz preconditioner with multiple MPI ranks per subdomain, `PCGASM`

1028:     Collective

1030:     Input Parameters:
1031: +   pc  - the preconditioner object
1032: .   n   - the number of subdomains for this MPI rank
1033: .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1034: -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)

1036:     Notes:
1037:     The `IS` indices use the parallel, global numbering of the vector entries.
1038:     Inner subdomains are those where the correction is applied.
1039:     Outer subdomains are those where the residual necessary to obtain the
1040:     corrections is obtained (see `PCGASMType` for the use of inner/outer subdomains).
1041:     Both inner and outer subdomains can extend over several MPI ranks.
1042:     This rank's portion of a subdomain is known as a local subdomain.

1044:     Inner subdomains can not overlap with each other, do not have any entities from remote ranks,
1045:     and  have to cover the entire local subdomain owned by the current rank. The index sets on each
1046:     rank should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1047:     on another MPI rank.

1049:     By default the `PGASM` preconditioner uses 1 (local) subdomain per MPI rank.

1051:     Level: advanced

1053: .seealso: `PCGASM`, `PCGASMSetOverlap()`, `PCGASMGetSubKSP()`,
1054:           `PCGASMCreateSubdomains2D()`, `PCGASMGetSubdomains()`
1055: @*/
1056: PetscErrorCode PCGASMSetSubdomains(PC pc, PetscInt n, IS iis[], IS ois[])
1057: {
1058:   PC_GASM *osm = (PC_GASM *)pc->data;

1061:   PetscTryMethod(pc, "PCGASMSetSubdomains_C", (PC, PetscInt, IS[], IS[]), (pc, n, iis, ois));
1062:   osm->dm_subdomains = PETSC_FALSE;
1063:   return 0;
1064: }

1066: /*@
1067:     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1068:     additive Schwarz preconditioner `PCGASM`.  Either all or no MPI ranks in the
1069:     pc communicator must call this routine.

1071:     Logically Collective

1073:     Input Parameters:
1074: +   pc  - the preconditioner context
1075: -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)

1077:     Options Database Key:
1078: .   -pc_gasm_overlap <overlap> - Sets overlap

1080:     Notes:
1081:     By default the `PCGASM` preconditioner uses 1 subdomain per rank.  To use
1082:     multiple subdomain per perocessor or "straddling" subdomains that intersect
1083:     multiple ranks use `PCGASMSetSubdomains()` (or option -pc_gasm_total_subdomains <n>).

1085:     The overlap defaults to 0, so if one desires that no additional
1086:     overlap be computed beyond what may have been set with a call to
1087:     `PCGASMSetSubdomains()`, then ovl must be set to be 0.  In particular, if one does
1088:     not explicitly set the subdomains in application code, then all overlap would be computed
1089:     internally by PETSc, and using an overlap of 0 would result in an `PCGASM`
1090:     variant that is equivalent to the block Jacobi preconditioner.

1092:     One can define initial index sets with any overlap via
1093:     `PCGASMSetSubdomains()`; the routine `PCGASMSetOverlap()` merely allows
1094:     PETSc to extend that overlap further, if desired.

1096:     Level: intermediate

1098: .seealso: `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMGetSubKSP()`,
1099:           `PCGASMCreateSubdomains2D()`, `PCGASMGetSubdomains()`
1100: @*/
1101: PetscErrorCode PCGASMSetOverlap(PC pc, PetscInt ovl)
1102: {
1103:   PC_GASM *osm = (PC_GASM *)pc->data;

1107:   PetscTryMethod(pc, "PCGASMSetOverlap_C", (PC, PetscInt), (pc, ovl));
1108:   osm->dm_subdomains = PETSC_FALSE;
1109:   return 0;
1110: }

1112: /*@
1113:     PCGASMSetType - Sets the type of restriction and interpolation used
1114:     for local problems in the `PCGASM` additive Schwarz method.

1116:     Logically Collective

1118:     Input Parameters:
1119: +   pc  - the preconditioner context
1120: -   type - variant of `PCGASM`, one of
1121: .vb
1122:       `PC_GASM_BASIC`       - full interpolation and restriction
1123:       `PC_GASM_RESTRICT`    - full restriction, local MPI rank interpolation
1124:       `PC_GASM_INTERPOLATE` - full interpolation, local MPI rank restriction
1125:       `PC_GASM_NONE`        - local MPI rank restriction and interpolation
1126: .ve

1128:     Options Database Key:
1129: .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type

1131:     Level: intermediate

1133: .seealso: `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMGetSubKSP()`,
1134:           `PCGASMCreateSubdomains2D()`, `PCASM`, `PCASMSetType()`
1135: @*/
1136: PetscErrorCode PCGASMSetType(PC pc, PCGASMType type)
1137: {
1140:   PetscTryMethod(pc, "PCGASMSetType_C", (PC, PCGASMType), (pc, type));
1141:   return 0;
1142: }

1144: /*@
1145:     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.

1147:     Logically Collective

1149:     Input Parameters:
1150: +   pc  - the preconditioner context
1151: -   doSort - sort the subdomain indices

1153:     Level: intermediate

1155: .seealso: `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMGetSubKSP()`,
1156:           `PCGASMCreateSubdomains2D()`
1157: @*/
1158: PetscErrorCode PCGASMSetSortIndices(PC pc, PetscBool doSort)
1159: {
1162:   PetscTryMethod(pc, "PCGASMSetSortIndices_C", (PC, PetscBool), (pc, doSort));
1163:   return 0;
1164: }

1166: /*@C
1167:    PCGASMGetSubKSP - Gets the local `KSP` contexts for all subdomains on
1168:    this MPI rank.

1170:    Collective iff first_local is requested

1172:    Input Parameter:
1173: .  pc - the preconditioner context

1175:    Output Parameters:
1176: +  n_local - the number of blocks on this MPI rank or NULL
1177: .  first_local - the global number of the first block on this rank or NULL,
1178:                  all ranks must request or all must pass NULL
1179: -  ksp - the array of `KSP` contexts

1181:    Note:
1182:    After `PCGASMGetSubKSP()` the array of `KSP`es is not to be freed

1184:    Currently for some matrix implementations only 1 block per MPI rank
1185:    is supported.

1187:    You must call `KSPSetUp()` before calling `PCGASMGetSubKSP()`.

1189:    Level: advanced

1191: .seealso: `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMSetOverlap()`,
1192:           `PCGASMCreateSubdomains2D()`,
1193: @*/
1194: PetscErrorCode PCGASMGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
1195: {
1197:   PetscUseMethod(pc, "PCGASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
1198:   return 0;
1199: }

1201: /*MC
1202:    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1203:            its own `KSP` object on a subset of MPI ranks

1205:    Options Database Keys:
1206: +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among MPI ranks
1207: .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in `PCView()`, -ksp_view or -snes_view
1208: .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in `PCSetUp()`
1209: .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1210: -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets `PCGASMType`

1212:    Notes:
1213:    To set options on the solvers for each block append -sub_ to all the `KSP`, and `PC`
1214:    options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

1216:    To set the options on the solvers separate for each block call `PCGASMGetSubKSP()`
1217:    and set the options directly on the resulting `KSP` object (you can access its `PC`
1218:    with `KSPGetPC())`

1220:    Level: beginner

1222:     References:
1223: +   * - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1224:      Courant Institute, New York University Technical report
1225: -   * - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1226:     Cambridge University Press.

1228: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCASM`, `PCGASMType`, `PCGASMSetType()`,
1229:           `PCBJACOBI`, `PCGASMGetSubKSP()`, `PCGASMSetSubdomains()`,
1230:           `PCSetModifySubMatrices()`, `PCGASMSetOverlap()`, `PCGASMSetType()`
1231: M*/

1233: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1234: {
1235:   PC_GASM *osm;

1237:   PetscNew(&osm);

1239:   osm->N                        = PETSC_DETERMINE;
1240:   osm->n                        = PETSC_DECIDE;
1241:   osm->nmax                     = PETSC_DETERMINE;
1242:   osm->overlap                  = 0;
1243:   osm->ksp                      = NULL;
1244:   osm->gorestriction            = NULL;
1245:   osm->girestriction            = NULL;
1246:   osm->pctoouter                = NULL;
1247:   osm->gx                       = NULL;
1248:   osm->gy                       = NULL;
1249:   osm->x                        = NULL;
1250:   osm->y                        = NULL;
1251:   osm->pcx                      = NULL;
1252:   osm->pcy                      = NULL;
1253:   osm->permutationIS            = NULL;
1254:   osm->permutationP             = NULL;
1255:   osm->pcmat                    = NULL;
1256:   osm->ois                      = NULL;
1257:   osm->iis                      = NULL;
1258:   osm->pmat                     = NULL;
1259:   osm->type                     = PC_GASM_RESTRICT;
1260:   osm->same_subdomain_solvers   = PETSC_TRUE;
1261:   osm->sort_indices             = PETSC_TRUE;
1262:   osm->dm_subdomains            = PETSC_FALSE;
1263:   osm->hierarchicalpartitioning = PETSC_FALSE;

1265:   pc->data                 = (void *)osm;
1266:   pc->ops->apply           = PCApply_GASM;
1267:   pc->ops->matapply        = PCMatApply_GASM;
1268:   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1269:   pc->ops->setup           = PCSetUp_GASM;
1270:   pc->ops->reset           = PCReset_GASM;
1271:   pc->ops->destroy         = PCDestroy_GASM;
1272:   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1273:   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1274:   pc->ops->view            = PCView_GASM;
1275:   pc->ops->applyrichardson = NULL;

1277:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetSubdomains_C", PCGASMSetSubdomains_GASM);
1278:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetOverlap_C", PCGASMSetOverlap_GASM);
1279:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetType_C", PCGASMSetType_GASM);
1280:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetSortIndices_C", PCGASMSetSortIndices_GASM);
1281:   PetscObjectComposeFunction((PetscObject)pc, "PCGASMGetSubKSP_C", PCGASMGetSubKSP_GASM);
1282:   return 0;
1283: }

1285: PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1286: {
1287:   MatPartitioning mpart;
1288:   const char     *prefix;
1289:   PetscInt        i, j, rstart, rend, bs;
1290:   PetscBool       hasop, isbaij = PETSC_FALSE, foundpart = PETSC_FALSE;
1291:   Mat             Ad = NULL, adj;
1292:   IS              ispart, isnumb, *is;


1296:   /* Get prefix, row distribution, and block size */
1297:   MatGetOptionsPrefix(A, &prefix);
1298:   MatGetOwnershipRange(A, &rstart, &rend);
1299:   MatGetBlockSize(A, &bs);

1302:   /* Get diagonal block from matrix if possible */
1303:   MatHasOperation(A, MATOP_GET_DIAGONAL_BLOCK, &hasop);
1304:   if (hasop) MatGetDiagonalBlock(A, &Ad);
1305:   if (Ad) {
1306:     PetscObjectBaseTypeCompare((PetscObject)Ad, MATSEQBAIJ, &isbaij);
1307:     if (!isbaij) PetscObjectBaseTypeCompare((PetscObject)Ad, MATSEQSBAIJ, &isbaij);
1308:   }
1309:   if (Ad && nloc > 1) {
1310:     PetscBool match, done;
1311:     /* Try to setup a good matrix partitioning if available */
1312:     MatPartitioningCreate(PETSC_COMM_SELF, &mpart);
1313:     PetscObjectSetOptionsPrefix((PetscObject)mpart, prefix);
1314:     MatPartitioningSetFromOptions(mpart);
1315:     PetscObjectTypeCompare((PetscObject)mpart, MATPARTITIONINGCURRENT, &match);
1316:     if (!match) PetscObjectTypeCompare((PetscObject)mpart, MATPARTITIONINGSQUARE, &match);
1317:     if (!match) { /* assume a "good" partitioner is available */
1318:       PetscInt        na;
1319:       const PetscInt *ia, *ja;
1320:       MatGetRowIJ(Ad, 0, PETSC_TRUE, isbaij, &na, &ia, &ja, &done);
1321:       if (done) {
1322:         /* Build adjacency matrix by hand. Unfortunately a call to
1323:            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1324:            remove the block-aij structure and we cannot expect
1325:            MatPartitioning to split vertices as we need */
1326:         PetscInt        i, j, len, nnz, cnt, *iia = NULL, *jja = NULL;
1327:         const PetscInt *row;
1328:         nnz = 0;
1329:         for (i = 0; i < na; i++) { /* count number of nonzeros */
1330:           len = ia[i + 1] - ia[i];
1331:           row = ja + ia[i];
1332:           for (j = 0; j < len; j++) {
1333:             if (row[j] == i) { /* don't count diagonal */
1334:               len--;
1335:               break;
1336:             }
1337:           }
1338:           nnz += len;
1339:         }
1340:         PetscMalloc1(na + 1, &iia);
1341:         PetscMalloc1(nnz, &jja);
1342:         nnz    = 0;
1343:         iia[0] = 0;
1344:         for (i = 0; i < na; i++) { /* fill adjacency */
1345:           cnt = 0;
1346:           len = ia[i + 1] - ia[i];
1347:           row = ja + ia[i];
1348:           for (j = 0; j < len; j++) {
1349:             if (row[j] != i) jja[nnz + cnt++] = row[j]; /* if not diagonal */
1350:           }
1351:           nnz += cnt;
1352:           iia[i + 1] = nnz;
1353:         }
1354:         /* Partitioning of the adjacency matrix */
1355:         MatCreateMPIAdj(PETSC_COMM_SELF, na, na, iia, jja, NULL, &adj);
1356:         MatPartitioningSetAdjacency(mpart, adj);
1357:         MatPartitioningSetNParts(mpart, nloc);
1358:         MatPartitioningApply(mpart, &ispart);
1359:         ISPartitioningToNumbering(ispart, &isnumb);
1360:         MatDestroy(&adj);
1361:         foundpart = PETSC_TRUE;
1362:       }
1363:       MatRestoreRowIJ(Ad, 0, PETSC_TRUE, isbaij, &na, &ia, &ja, &done);
1364:     }
1365:     MatPartitioningDestroy(&mpart);
1366:   }
1367:   PetscMalloc1(nloc, &is);
1368:   if (!foundpart) {
1369:     /* Partitioning by contiguous chunks of rows */

1371:     PetscInt mbs   = (rend - rstart) / bs;
1372:     PetscInt start = rstart;
1373:     for (i = 0; i < nloc; i++) {
1374:       PetscInt count = (mbs / nloc + ((mbs % nloc) > i)) * bs;
1375:       ISCreateStride(PETSC_COMM_SELF, count, start, 1, &is[i]);
1376:       start += count;
1377:     }

1379:   } else {
1380:     /* Partitioning by adjacency of diagonal block  */

1382:     const PetscInt *numbering;
1383:     PetscInt       *count, nidx, *indices, *newidx, start = 0;
1384:     /* Get node count in each partition */
1385:     PetscMalloc1(nloc, &count);
1386:     ISPartitioningCount(ispart, nloc, count);
1387:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1388:       for (i = 0; i < nloc; i++) count[i] *= bs;
1389:     }
1390:     /* Build indices from node numbering */
1391:     ISGetLocalSize(isnumb, &nidx);
1392:     PetscMalloc1(nidx, &indices);
1393:     for (i = 0; i < nidx; i++) indices[i] = i; /* needs to be initialized */
1394:     ISGetIndices(isnumb, &numbering);
1395:     PetscSortIntWithPermutation(nidx, numbering, indices);
1396:     ISRestoreIndices(isnumb, &numbering);
1397:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1398:       PetscMalloc1(nidx * bs, &newidx);
1399:       for (i = 0; i < nidx; i++) {
1400:         for (j = 0; j < bs; j++) newidx[i * bs + j] = indices[i] * bs + j;
1401:       }
1402:       PetscFree(indices);
1403:       nidx *= bs;
1404:       indices = newidx;
1405:     }
1406:     /* Shift to get global indices */
1407:     for (i = 0; i < nidx; i++) indices[i] += rstart;

1409:     /* Build the index sets for each block */
1410:     for (i = 0; i < nloc; i++) {
1411:       ISCreateGeneral(PETSC_COMM_SELF, count[i], &indices[start], PETSC_COPY_VALUES, &is[i]);
1412:       ISSort(is[i]);
1413:       start += count[i];
1414:     }

1416:     PetscFree(count);
1417:     PetscFree(indices);
1418:     ISDestroy(&isnumb);
1419:     ISDestroy(&ispart);
1420:   }
1421:   *iis = is;
1422:   return 0;
1423: }

1425: PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A, PetscInt N, PetscInt *n, IS *iis[])
1426: {
1427:   MatSubdomainsCreateCoalesce(A, N, n, iis);
1428:   return 0;
1429: }

1431: /*@C
1432:    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the `PCGASM` additive
1433:    Schwarz preconditioner for a any problem based on its matrix.

1435:    Collective

1437:    Input Parameters:
1438: +  A       - The global matrix operator
1439: -  N       - the number of global subdomains requested

1441:    Output Parameters:
1442: +  n   - the number of subdomains created on this MPI rank
1443: -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)

1445:    Level: advanced

1447:    Note:
1448:    When N >= A's communicator size, each subdomain is local -- contained within a single MPI rank.
1449:          When N < size, the subdomains are 'straddling' (rank boundaries) and are no longer local.
1450:          The resulting subdomains can be use in `PCGASMSetSubdomains`(pc,n,iss,NULL).  The overlapping
1451:          outer subdomains will be automatically generated from these according to the requested amount of
1452:          overlap; this is currently supported only with local subdomains.

1454: .seealso: `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMDestroySubdomains()`
1455: @*/
1456: PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt N, PetscInt *n, IS *iis[])
1457: {
1458:   PetscMPIInt size;


1464:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
1465:   if (N >= size) {
1466:     *n = N / size + (N % size);
1467:     PCGASMCreateLocalSubdomains(A, *n, iis);
1468:   } else {
1469:     PCGASMCreateStraddlingSubdomains(A, N, n, iis);
1470:   }
1471:   return 0;
1472: }

1474: /*@C
1475:    PCGASMDestroySubdomains - Destroys the index sets created with
1476:    `PCGASMCreateSubdomains()` or `PCGASMCreateSubdomains2D()`. Should be
1477:    called after setting subdomains with `PCGASMSetSubdomains()`.

1479:    Collective

1481:    Input Parameters:
1482: +  n   - the number of index sets
1483: .  iis - the array of inner subdomains,
1484: -  ois - the array of outer subdomains, can be NULL

1486:    Level: intermediate

1488:    Note:
1489:    This is a convenience subroutine that walks each list,
1490:    destroys each `IS` on the list, and then frees the list. At the end the
1491:    list pointers are set to NULL.

1493: .seealso: `PCGASM`, `PCGASMCreateSubdomains()`, `PCGASMSetSubdomains()`
1494: @*/
1495: PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS **iis, IS **ois)
1496: {
1497:   PetscInt i;

1499:   if (n <= 0) return 0;
1500:   if (ois) {
1502:     if (*ois) {
1504:       for (i = 0; i < n; i++) ISDestroy(&(*ois)[i]);
1505:       PetscFree((*ois));
1506:     }
1507:   }
1508:   if (iis) {
1510:     if (*iis) {
1512:       for (i = 0; i < n; i++) ISDestroy(&(*iis)[i]);
1513:       PetscFree((*iis));
1514:     }
1515:   }
1516:   return 0;
1517: }

1519: #define PCGASMLocalSubdomainBounds2D(M, N, xleft, ylow, xright, yhigh, first, last, xleft_loc, ylow_loc, xright_loc, yhigh_loc, n) \
1520:   { \
1521:     PetscInt first_row = first / M, last_row = last / M + 1; \
1522:     /*                                                                                                    \
1523:      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1524:      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1525:      subdomain).                                                                                          \
1526:      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1527:      of the intersection.                                                                                 \
1528:     */ \
1529:     /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1530:     *ylow_loc = PetscMax(first_row, ylow); \
1531:     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1532:     *xleft_loc = *ylow_loc == first_row ? PetscMax(first % M, xleft) : xleft; \
1533:     /* yhigh_loc is the grid row above the last local subdomain element */ \
1534:     *yhigh_loc = PetscMin(last_row, yhigh); \
1535:     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */ \
1536:     *xright_loc = *yhigh_loc == last_row ? PetscMin(xright, last % M) : xright; \
1537:     /* Now compute the size of the local subdomain n. */ \
1538:     *n = 0; \
1539:     if (*ylow_loc < *yhigh_loc) { \
1540:       PetscInt width = xright - xleft; \
1541:       *n += width * (*yhigh_loc - *ylow_loc - 1); \
1542:       *n += PetscMin(PetscMax(*xright_loc - xleft, 0), width); \
1543:       *n -= PetscMin(PetscMax(*xleft_loc - xleft, 0), width); \
1544:     } \
1545:   }

1547: /*@
1548:    PCGASMCreateSubdomains2D - Creates the index sets for the `PCGASM` overlapping Schwarz
1549:    preconditioner for a two-dimensional problem on a regular grid.

1551:    Collective

1553:    Input Parameters:
1554: +  pc       - the preconditioner context
1555: .  M        - the global number of grid points in the x direction
1556: .  N        - the global number of grid points in the y direction
1557: .  Mdomains - the global number of subdomains in the x direction
1558: .  Ndomains - the global number of subdomains in the y direction
1559: .  dof      - degrees of freedom per node
1560: -  overlap  - overlap in mesh lines

1562:    Output Parameters:
1563: +  Nsub - the number of local subdomains created
1564: .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1565: -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains

1567:    Level: advanced

1569: .seealso: `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMGetSubKSP()`, `PCGASMSetOverlap()`
1570: @*/
1571: PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M, PetscInt N, PetscInt Mdomains, PetscInt Ndomains, PetscInt dof, PetscInt overlap, PetscInt *nsub, IS **iis, IS **ois)
1572: {
1573:   PetscMPIInt size, rank;
1574:   PetscInt    i, j;
1575:   PetscInt    maxheight, maxwidth;
1576:   PetscInt    xstart, xleft, xright, xleft_loc, xright_loc;
1577:   PetscInt    ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1578:   PetscInt    x[2][2], y[2][2], n[2];
1579:   PetscInt    first, last;
1580:   PetscInt    nidx, *idx;
1581:   PetscInt    ii, jj, s, q, d;
1582:   PetscInt    k, kk;
1583:   PetscMPIInt color;
1584:   MPI_Comm    comm, subcomm;
1585:   IS        **xis = NULL, **is = ois, **is_local = iis;

1587:   PetscObjectGetComm((PetscObject)pc, &comm);
1588:   MPI_Comm_size(comm, &size);
1589:   MPI_Comm_rank(comm, &rank);
1590:   MatGetOwnershipRange(pc->pmat, &first, &last);
1592:              "Matrix row partitioning unsuitable for domain decomposition: local row range (%" PetscInt_FMT ",%" PetscInt_FMT ") "
1593:              "does not respect the number of degrees of freedom per grid point %" PetscInt_FMT,
1594:              first, last, dof);

1596:   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1597:   s      = 0;
1598:   ystart = 0;
1599:   for (j = 0; j < Ndomains; ++j) {
1600:     maxheight = N / Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1602:     /* Vertical domain limits with an overlap. */
1603:     ylow   = PetscMax(ystart - overlap, 0);
1604:     yhigh  = PetscMin(ystart + maxheight + overlap, N);
1605:     xstart = 0;
1606:     for (i = 0; i < Mdomains; ++i) {
1607:       maxwidth = M / Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1609:       /* Horizontal domain limits with an overlap. */
1610:       xleft  = PetscMax(xstart - overlap, 0);
1611:       xright = PetscMin(xstart + maxwidth + overlap, M);
1612:       /*
1613:          Determine whether this subdomain intersects this rank's ownership range of pc->pmat.
1614:       */
1615:       PCGASMLocalSubdomainBounds2D(M, N, xleft, ylow, xright, yhigh, first, last, (&xleft_loc), (&ylow_loc), (&xright_loc), (&yhigh_loc), (&nidx));
1616:       if (nidx) ++s;
1617:       xstart += maxwidth;
1618:     } /* for (i = 0; i < Mdomains; ++i) */
1619:     ystart += maxheight;
1620:   } /* for (j = 0; j < Ndomains; ++j) */

1622:   /* Now we can allocate the necessary number of ISs. */
1623:   *nsub = s;
1624:   PetscMalloc1(*nsub, is);
1625:   PetscMalloc1(*nsub, is_local);
1626:   s      = 0;
1627:   ystart = 0;
1628:   for (j = 0; j < Ndomains; ++j) {
1629:     maxheight = N / Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1631:     /* Vertical domain limits with an overlap. */
1632:     y[0][0] = PetscMax(ystart - overlap, 0);
1633:     y[0][1] = PetscMin(ystart + maxheight + overlap, N);
1634:     /* Vertical domain limits without an overlap. */
1635:     y[1][0] = ystart;
1636:     y[1][1] = PetscMin(ystart + maxheight, N);
1637:     xstart  = 0;
1638:     for (i = 0; i < Mdomains; ++i) {
1639:       maxwidth = M / Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1641:       /* Horizontal domain limits with an overlap. */
1642:       x[0][0] = PetscMax(xstart - overlap, 0);
1643:       x[0][1] = PetscMin(xstart + maxwidth + overlap, M);
1644:       /* Horizontal domain limits without an overlap. */
1645:       x[1][0] = xstart;
1646:       x[1][1] = PetscMin(xstart + maxwidth, M);
1647:       /*
1648:          Determine whether this domain intersects this rank's ownership range of pc->pmat.
1649:          Do this twice: first for the domains with overlaps, and once without.
1650:          During the first pass create the subcommunicators, and use them on the second pass as well.
1651:       */
1652:       for (q = 0; q < 2; ++q) {
1653:         PetscBool split = PETSC_FALSE;
1654:         /*
1655:           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1656:           according to whether the domain with an overlap or without is considered.
1657:         */
1658:         xleft  = x[q][0];
1659:         xright = x[q][1];
1660:         ylow   = y[q][0];
1661:         yhigh  = y[q][1];
1662:         PCGASMLocalSubdomainBounds2D(M, N, xleft, ylow, xright, yhigh, first, last, (&xleft_loc), (&ylow_loc), (&xright_loc), (&yhigh_loc), (&nidx));
1663:         nidx *= dof;
1664:         n[q] = nidx;
1665:         /*
1666:          Based on the counted number of indices in the local domain *with an overlap*,
1667:          construct a subcommunicator of all the MPI ranks supporting this domain.
1668:          Observe that a domain with an overlap might have nontrivial local support,
1669:          while the domain without an overlap might not.  Hence, the decision to participate
1670:          in the subcommunicator must be based on the domain with an overlap.
1671:          */
1672:         if (q == 0) {
1673:           if (nidx) color = 1;
1674:           else color = MPI_UNDEFINED;
1675:           MPI_Comm_split(comm, color, rank, &subcomm);
1676:           split = PETSC_TRUE;
1677:         }
1678:         /*
1679:          Proceed only if the number of local indices *with an overlap* is nonzero.
1680:          */
1681:         if (n[0]) {
1682:           if (q == 0) xis = is;
1683:           if (q == 1) {
1684:             /*
1685:              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1686:              Moreover, if the overlap is zero, the two ISs are identical.
1687:              */
1688:             if (overlap == 0) {
1689:               (*is_local)[s] = (*is)[s];
1690:               PetscObjectReference((PetscObject)(*is)[s]);
1691:               continue;
1692:             } else {
1693:               xis     = is_local;
1694:               subcomm = ((PetscObject)(*is)[s])->comm;
1695:             }
1696:           } /* if (q == 1) */
1697:           idx = NULL;
1698:           PetscMalloc1(nidx, &idx);
1699:           if (nidx) {
1700:             k = 0;
1701:             for (jj = ylow_loc; jj < yhigh_loc; ++jj) {
1702:               PetscInt x0 = (jj == ylow_loc) ? xleft_loc : xleft;
1703:               PetscInt x1 = (jj == yhigh_loc - 1) ? xright_loc : xright;
1704:               kk          = dof * (M * jj + x0);
1705:               for (ii = x0; ii < x1; ++ii) {
1706:                 for (d = 0; d < dof; ++d) idx[k++] = kk++;
1707:               }
1708:             }
1709:           }
1710:           ISCreateGeneral(subcomm, nidx, idx, PETSC_OWN_POINTER, (*xis) + s);
1711:           if (split) MPI_Comm_free(&subcomm);
1712:         } /* if (n[0]) */
1713:       }   /* for (q = 0; q < 2; ++q) */
1714:       if (n[0]) ++s;
1715:       xstart += maxwidth;
1716:     } /* for (i = 0; i < Mdomains; ++i) */
1717:     ystart += maxheight;
1718:   } /* for (j = 0; j < Ndomains; ++j) */
1719:   return 0;
1720: }

1722: /*@C
1723:     PCGASMGetSubdomains - Gets the subdomains supported on this MPI rank
1724:     for the `PCGASM` additive Schwarz preconditioner.

1726:     Not Collective

1728:     Input Parameter:
1729: .   pc - the preconditioner context

1731:     Output Parameters:
1732: +   n   - the number of subdomains for this MPI rank (default value = 1)
1733: .   iis - the index sets that define the inner subdomains (without overlap) supported on this rank (can be NULL)
1734: -   ois - the index sets that define the outer subdomains (with overlap) supported on this rank (can be NULL)

1736:     Note:
1737:     The user is responsible for destroying the `IS`s and freeing the returned arrays.
1738:     The `IS` numbering is in the parallel, global numbering of the vector.

1740:     Level: advanced

1742: .seealso: `PCGASM`, `PCGASMSetOverlap()`, `PCGASMGetSubKSP()`, `PCGASMCreateSubdomains2D()`,
1743:           `PCGASMSetSubdomains()`, `PCGASMGetSubmatrices()`
1744: @*/
1745: PetscErrorCode PCGASMGetSubdomains(PC pc, PetscInt *n, IS *iis[], IS *ois[])
1746: {
1747:   PC_GASM  *osm;
1748:   PetscBool match;
1749:   PetscInt  i;

1752:   PetscObjectTypeCompare((PetscObject)pc, PCGASM, &match);
1754:   osm = (PC_GASM *)pc->data;
1755:   if (n) *n = osm->n;
1756:   if (iis) PetscMalloc1(osm->n, iis);
1757:   if (ois) PetscMalloc1(osm->n, ois);
1758:   if (iis || ois) {
1759:     for (i = 0; i < osm->n; ++i) {
1760:       if (iis) (*iis)[i] = osm->iis[i];
1761:       if (ois) (*ois)[i] = osm->ois[i];
1762:     }
1763:   }
1764:   return 0;
1765: }

1767: /*@C
1768:     PCGASMGetSubmatrices - Gets the local submatrices (for this MPI rank
1769:     only) for the `PCGASM` additive Schwarz preconditioner.

1771:     Not Collective

1773:     Input Parameter:
1774: .   pc - the preconditioner context

1776:     Output Parameters:
1777: +   n   - the number of matrices for this MPI rank (default value = 1)
1778: -   mat - the matrices

1780:     Note:
1781:     Matrices returned by this routine have the same communicators as the index sets (IS)
1782:            used to define subdomains in `PCGASMSetSubdomains()`

1784:     Level: advanced

1786: .seealso: `PCGASM`, `PCGASMSetOverlap()`, `PCGASMGetSubKSP()`,
1787:           `PCGASMCreateSubdomains2D()`, `PCGASMSetSubdomains()`, `PCGASMGetSubdomains()`
1788: @*/
1789: PetscErrorCode PCGASMGetSubmatrices(PC pc, PetscInt *n, Mat *mat[])
1790: {
1791:   PC_GASM  *osm;
1792:   PetscBool match;

1798:   PetscObjectTypeCompare((PetscObject)pc, PCGASM, &match);
1800:   osm = (PC_GASM *)pc->data;
1801:   if (n) *n = osm->n;
1802:   if (mat) *mat = osm->pmat;
1803:   return 0;
1804: }

1806: /*@
1807:     PCGASMSetUseDMSubdomains - Indicates whether to use `DMCreateDomainDecomposition()` to define the subdomains, whenever possible for `PCGASM`

1809:     Logically Collective

1811:     Input Parameters:
1812: +   pc  - the preconditioner
1813: -   flg - boolean indicating whether to use subdomains defined by the `DM`

1815:     Options Database Key:
1816: .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains

1818:     Level: intermediate

1820:     Note:
1821:     `PCGASMSetSubdomains()`, `PCGASMSetTotalSubdomains()` or `PCGASMSetOverlap()` take precedence over `PCGASMSetUseDMSubdomains()`,
1822:     so setting `PCGASMSetSubdomains()` with nontrivial subdomain ISs or any of `PCGASMSetTotalSubdomains()` and `PCGASMSetOverlap()`
1823:     automatically turns the latter off.

1825: .seealso: `PCGASM`, `PCGASMGetUseDMSubdomains()`, `PCGASMSetSubdomains()`, `PCGASMSetOverlap()`
1826:           `PCGASMCreateSubdomains2D()`
1827: @*/
1828: PetscErrorCode PCGASMSetUseDMSubdomains(PC pc, PetscBool flg)
1829: {
1830:   PC_GASM  *osm = (PC_GASM *)pc->data;
1831:   PetscBool match;

1836:   PetscObjectTypeCompare((PetscObject)pc, PCGASM, &match);
1837:   if (match) {
1838:     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) osm->dm_subdomains = flg;
1839:   }
1840:   return 0;
1841: }

1843: /*@
1844:     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use `DMCreateDomainDecomposition()` to define the subdomains, whenever possible with `PCGASM`

1846:     Not Collective

1848:     Input Parameter:
1849: .   pc  - the preconditioner

1851:     Output Parameter:
1852: .   flg - boolean indicating whether to use subdomains defined by the `DM`

1854:     Level: intermediate

1856: .seealso: `PCGASM`, `PCGASMSetUseDMSubdomains()`, `PCGASMSetOverlap()`
1857:           `PCGASMCreateSubdomains2D()`
1858: @*/
1859: PetscErrorCode PCGASMGetUseDMSubdomains(PC pc, PetscBool *flg)
1860: {
1861:   PC_GASM  *osm = (PC_GASM *)pc->data;
1862:   PetscBool match;

1866:   PetscObjectTypeCompare((PetscObject)pc, PCGASM, &match);
1867:   if (match) {
1868:     if (flg) *flg = osm->dm_subdomains;
1869:   }
1870:   return 0;
1871: }