Actual source code: bjacobi.c


  2: /*
  3:    Defines a block Jacobi preconditioner.
  4: */

  6: #include <../src/ksp/pc/impls/bjacobi/bjacobi.h>

  8: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC, Mat, Mat);
  9: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC, Mat, Mat);
 10: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);

 12: static PetscErrorCode PCSetUp_BJacobi(PC pc)
 13: {
 14:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
 15:   Mat         mat = pc->mat, pmat = pc->pmat;
 16:   PetscBool   hasop;
 17:   PetscInt    N, M, start, i, sum, end;
 18:   PetscInt    bs, i_start = -1, i_end = -1;
 19:   PetscMPIInt rank, size;

 21:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
 22:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
 23:   MatGetLocalSize(pc->pmat, &M, &N);
 24:   MatGetBlockSize(pc->pmat, &bs);

 26:   if (jac->n > 0 && jac->n < size) {
 27:     PCSetUp_BJacobi_Multiproc(pc);
 28:     return 0;
 29:   }

 31:   /*    Determines the number of blocks assigned to each processor */
 32:   /*   local block count  given */
 33:   if (jac->n_local > 0 && jac->n < 0) {
 34:     MPIU_Allreduce(&jac->n_local, &jac->n, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
 35:     if (jac->l_lens) { /* check that user set these correctly */
 36:       sum = 0;
 37:       for (i = 0; i < jac->n_local; i++) {
 39:         sum += jac->l_lens[i];
 40:       }
 42:     } else {
 43:       PetscMalloc1(jac->n_local, &jac->l_lens);
 44:       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
 45:     }
 46:   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
 47:     /* global blocks given: determine which ones are local */
 48:     if (jac->g_lens) {
 49:       /* check if the g_lens is has valid entries */
 50:       for (i = 0; i < jac->n; i++) {
 53:       }
 54:       if (size == 1) {
 55:         jac->n_local = jac->n;
 56:         PetscMalloc1(jac->n_local, &jac->l_lens);
 57:         PetscArraycpy(jac->l_lens, jac->g_lens, jac->n_local);
 58:         /* check that user set these correctly */
 59:         sum = 0;
 60:         for (i = 0; i < jac->n_local; i++) sum += jac->l_lens[i];
 62:       } else {
 63:         MatGetOwnershipRange(pc->pmat, &start, &end);
 64:         /* loop over blocks determing first one owned by me */
 65:         sum = 0;
 66:         for (i = 0; i < jac->n + 1; i++) {
 67:           if (sum == start) {
 68:             i_start = i;
 69:             goto start_1;
 70:           }
 71:           if (i < jac->n) sum += jac->g_lens[i];
 72:         }
 73:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
 74:       start_1:
 75:         for (i = i_start; i < jac->n + 1; i++) {
 76:           if (sum == end) {
 77:             i_end = i;
 78:             goto end_1;
 79:           }
 80:           if (i < jac->n) sum += jac->g_lens[i];
 81:         }
 82:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
 83:       end_1:
 84:         jac->n_local = i_end - i_start;
 85:         PetscMalloc1(jac->n_local, &jac->l_lens);
 86:         PetscArraycpy(jac->l_lens, jac->g_lens + i_start, jac->n_local);
 87:       }
 88:     } else { /* no global blocks given, determine then using default layout */
 89:       jac->n_local = jac->n / size + ((jac->n % size) > rank);
 90:       PetscMalloc1(jac->n_local, &jac->l_lens);
 91:       for (i = 0; i < jac->n_local; i++) {
 92:         jac->l_lens[i] = ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i)) * bs;
 94:       }
 95:     }
 96:   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
 97:     jac->n       = size;
 98:     jac->n_local = 1;
 99:     PetscMalloc1(1, &jac->l_lens);
100:     jac->l_lens[0] = M;
101:   } else { /* jac->n > 0 && jac->n_local > 0 */
102:     if (!jac->l_lens) {
103:       PetscMalloc1(jac->n_local, &jac->l_lens);
104:       for (i = 0; i < jac->n_local; i++) jac->l_lens[i] = bs * ((M / bs) / jac->n_local + (((M / bs) % jac->n_local) > i));
105:     }
106:   }

109:   /*    Determines mat and pmat */
110:   MatHasOperation(pc->mat, MATOP_GET_DIAGONAL_BLOCK, &hasop);
111:   if (!hasop && size == 1) {
112:     mat  = pc->mat;
113:     pmat = pc->pmat;
114:   } else {
115:     if (pc->useAmat) {
116:       /* use block from Amat matrix, not Pmat for local MatMult() */
117:       MatGetDiagonalBlock(pc->mat, &mat);
118:     }
119:     if (pc->pmat != pc->mat || !pc->useAmat) {
120:       MatGetDiagonalBlock(pc->pmat, &pmat);
121:     } else pmat = mat;
122:   }

124:   /*
125:      Setup code depends on the number of blocks
126:   */
127:   if (jac->n_local == 1) {
128:     PCSetUp_BJacobi_Singleblock(pc, mat, pmat);
129:   } else {
130:     PCSetUp_BJacobi_Multiblock(pc, mat, pmat);
131:   }
132:   return 0;
133: }

135: /* Default destroy, if it has never been setup */
136: static PetscErrorCode PCDestroy_BJacobi(PC pc)
137: {
138:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

140:   PetscFree(jac->g_lens);
141:   PetscFree(jac->l_lens);
142:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", NULL);
143:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", NULL);
144:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", NULL);
145:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", NULL);
146:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", NULL);
147:   PetscFree(pc->data);
148:   return 0;
149: }

151: static PetscErrorCode PCSetFromOptions_BJacobi(PC pc, PetscOptionItems *PetscOptionsObject)
152: {
153:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
154:   PetscInt    blocks, i;
155:   PetscBool   flg;

157:   PetscOptionsHeadBegin(PetscOptionsObject, "Block Jacobi options");
158:   PetscOptionsInt("-pc_bjacobi_blocks", "Total number of blocks", "PCBJacobiSetTotalBlocks", jac->n, &blocks, &flg);
159:   if (flg) PCBJacobiSetTotalBlocks(pc, blocks, NULL);
160:   PetscOptionsInt("-pc_bjacobi_local_blocks", "Local number of blocks", "PCBJacobiSetLocalBlocks", jac->n_local, &blocks, &flg);
161:   if (flg) PCBJacobiSetLocalBlocks(pc, blocks, NULL);
162:   if (jac->ksp) {
163:     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
164:      * unless we had already been called. */
165:     for (i = 0; i < jac->n_local; i++) KSPSetFromOptions(jac->ksp[i]);
166:   }
167:   PetscOptionsHeadEnd();
168:   return 0;
169: }

171: #include <petscdraw.h>
172: static PetscErrorCode PCView_BJacobi(PC pc, PetscViewer viewer)
173: {
174:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
175:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
176:   PetscMPIInt           rank;
177:   PetscInt              i;
178:   PetscBool             iascii, isstring, isdraw;
179:   PetscViewer           sviewer;
180:   PetscViewerFormat     format;
181:   const char           *prefix;

183:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
184:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);
185:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
186:   if (iascii) {
187:     if (pc->useAmat) PetscViewerASCIIPrintf(viewer, "  using Amat local matrix, number of blocks = %" PetscInt_FMT "\n", jac->n);
188:     PetscViewerASCIIPrintf(viewer, "  number of blocks = %" PetscInt_FMT "\n", jac->n);
189:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
190:     PetscViewerGetFormat(viewer, &format);
191:     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
192:       PetscViewerASCIIPrintf(viewer, "  Local solver information for first block is in the following KSP and PC objects on rank 0:\n");
193:       PCGetOptionsPrefix(pc, &prefix);
194:       PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : "");
195:       if (jac->ksp && !jac->psubcomm) {
196:         PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
197:         if (rank == 0) {
198:           PetscViewerASCIIPushTab(viewer);
199:           KSPView(jac->ksp[0], sviewer);
200:           PetscViewerASCIIPopTab(viewer);
201:         }
202:         PetscViewerFlush(sviewer);
203:         PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
204:         PetscViewerFlush(viewer);
205:         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
206:         PetscViewerASCIIPopSynchronized(viewer);
207:       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
208:         PetscViewerGetSubViewer(viewer, mpjac->psubcomm->child, &sviewer);
209:         if (!mpjac->psubcomm->color) {
210:           PetscViewerASCIIPushTab(viewer);
211:           KSPView(*(jac->ksp), sviewer);
212:           PetscViewerASCIIPopTab(viewer);
213:         }
214:         PetscViewerFlush(sviewer);
215:         PetscViewerRestoreSubViewer(viewer, mpjac->psubcomm->child, &sviewer);
216:         PetscViewerFlush(viewer);
217:         /*  extra call needed because of the two calls to PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
218:         PetscViewerASCIIPopSynchronized(viewer);
219:       } else {
220:         PetscViewerFlush(viewer);
221:       }
222:     } else {
223:       PetscInt n_global;
224:       MPIU_Allreduce(&jac->n_local, &n_global, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc));
225:       PetscViewerASCIIPushSynchronized(viewer);
226:       PetscViewerASCIIPrintf(viewer, "  Local solver information for each block is in the following KSP and PC objects:\n");
227:       PetscViewerASCIISynchronizedPrintf(viewer, "[%d] number of local blocks = %" PetscInt_FMT ", first local block number = %" PetscInt_FMT "\n", rank, jac->n_local, jac->first_local);
228:       PetscViewerASCIIPushTab(viewer);
229:       PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
230:       for (i = 0; i < jac->n_local; i++) {
231:         PetscViewerASCIISynchronizedPrintf(viewer, "[%d] local block number %" PetscInt_FMT "\n", rank, i);
232:         KSPView(jac->ksp[i], sviewer);
233:         PetscViewerASCIISynchronizedPrintf(viewer, "- - - - - - - - - - - - - - - - - -\n");
234:       }
235:       PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
236:       PetscViewerASCIIPopTab(viewer);
237:       PetscViewerFlush(viewer);
238:       PetscViewerASCIIPopSynchronized(viewer);
239:     }
240:   } else if (isstring) {
241:     PetscViewerStringSPrintf(viewer, " blks=%" PetscInt_FMT, jac->n);
242:     PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
243:     if (jac->ksp) KSPView(jac->ksp[0], sviewer);
244:     PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
245:   } else if (isdraw) {
246:     PetscDraw draw;
247:     char      str[25];
248:     PetscReal x, y, bottom, h;

250:     PetscViewerDrawGetDraw(viewer, 0, &draw);
251:     PetscDrawGetCurrentPoint(draw, &x, &y);
252:     PetscSNPrintf(str, 25, "Number blocks %" PetscInt_FMT, jac->n);
253:     PetscDrawStringBoxed(draw, x, y, PETSC_DRAW_RED, PETSC_DRAW_BLACK, str, NULL, &h);
254:     bottom = y - h;
255:     PetscDrawPushCurrentPoint(draw, x, bottom);
256:     /* warning the communicator on viewer is different then on ksp in parallel */
257:     if (jac->ksp) KSPView(jac->ksp[0], viewer);
258:     PetscDrawPopCurrentPoint(draw);
259:   }
260:   return 0;
261: }

263: static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp)
264: {
265:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;


269:   if (n_local) *n_local = jac->n_local;
270:   if (first_local) *first_local = jac->first_local;
271:   if (ksp) *ksp = jac->ksp;
272:   return 0;
273: }

275: static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc, PetscInt blocks, PetscInt *lens)
276: {
277:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

280:   jac->n = blocks;
281:   if (!lens) jac->g_lens = NULL;
282:   else {
283:     PetscMalloc1(blocks, &jac->g_lens);
284:     PetscArraycpy(jac->g_lens, lens, blocks);
285:   }
286:   return 0;
287: }

289: static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
290: {
291:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

293:   *blocks = jac->n;
294:   if (lens) *lens = jac->g_lens;
295:   return 0;
296: }

298: static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc, PetscInt blocks, const PetscInt lens[])
299: {
300:   PC_BJacobi *jac;

302:   jac = (PC_BJacobi *)pc->data;

304:   jac->n_local = blocks;
305:   if (!lens) jac->l_lens = NULL;
306:   else {
307:     PetscMalloc1(blocks, &jac->l_lens);
308:     PetscArraycpy(jac->l_lens, lens, blocks);
309:   }
310:   return 0;
311: }

313: static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
314: {
315:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;

317:   *blocks = jac->n_local;
318:   if (lens) *lens = jac->l_lens;
319:   return 0;
320: }

322: /*@C
323:    PCBJacobiGetSubKSP - Gets the local `KSP` contexts for all blocks on
324:    this processor.

326:    Not Collective

328:    Input Parameter:
329: .  pc - the preconditioner context

331:    Output Parameters:
332: +  n_local - the number of blocks on this processor, or NULL
333: .  first_local - the global number of the first block on this processor, or NULL
334: -  ksp - the array of KSP contexts

336:    Notes:
337:    After `PCBJacobiGetSubKSP()` the array of `KSP` contexts is not to be freed.

339:    Currently for some matrix implementations only 1 block per processor
340:    is supported.

342:    You must call `KSPSetUp()` or `PCSetUp()` before calling `PCBJacobiGetSubKSP()`.

344:    Fortran Usage:
345:    You must pass in a `KSP` array that is large enough to contain all the local `KSP`s.

347:    You can call `PCBJacobiGetSubKSP`(pc,nlocal,firstlocal,`PETSC_NULL_KSP`,ierr) to determine how large the
348:    `KSP` array must be.

350:    Level: advanced

352: .seealso: `PCBJACOBI`, `PCASM`, `PCASMGetSubKSP()`
353: @*/
354: PetscErrorCode PCBJacobiGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
355: {
357:   PetscUseMethod(pc, "PCBJacobiGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
358:   return 0;
359: }

361: /*@
362:    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
363:    Jacobi preconditioner.

365:    Collective

367:    Input Parameters:
368: +  pc - the preconditioner context
369: .  blocks - the number of blocks
370: -  lens - [optional] integer array containing the size of each block

372:    Options Database Key:
373: .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks

375:    Note:
376:    Currently only a limited number of blocking configurations are supported.
377:    All processors sharing the `PC` must call this routine with the same data.

379:    Level: intermediate

381: .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetLocalBlocks()`
382: @*/
383: PetscErrorCode PCBJacobiSetTotalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
384: {
387:   PetscTryMethod(pc, "PCBJacobiSetTotalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
388:   return 0;
389: }

391: /*@C
392:    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
393:    Jacobi, `PCBJACOBI`, preconditioner.

395:    Not Collective

397:    Input Parameter:
398: .  pc - the preconditioner context

400:    Output parameters:
401: +  blocks - the number of blocks
402: -  lens - integer array containing the size of each block

404:    Level: intermediate

406: .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetLocalBlocks()`
407: @*/
408: PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
409: {
412:   PetscUseMethod(pc, "PCBJacobiGetTotalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
413:   return 0;
414: }

416: /*@
417:    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
418:    Jacobi, `PCBJACOBI`,  preconditioner.

420:    Not Collective

422:    Input Parameters:
423: +  pc - the preconditioner context
424: .  blocks - the number of blocks
425: -  lens - [optional] integer array containing size of each block

427:    Options Database Key:
428: .  -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks

430:    Note:
431:    Currently only a limited number of blocking configurations are supported.

433:    Level: intermediate

435: .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiSetTotalBlocks()`
436: @*/
437: PetscErrorCode PCBJacobiSetLocalBlocks(PC pc, PetscInt blocks, const PetscInt lens[])
438: {
441:   PetscTryMethod(pc, "PCBJacobiSetLocalBlocks_C", (PC, PetscInt, const PetscInt[]), (pc, blocks, lens));
442:   return 0;
443: }

445: /*@C
446:    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
447:    Jacobi, `PCBJACOBI`, preconditioner.

449:    Not Collective

451:    Input Parameters:
452: +  pc - the preconditioner context
453: .  blocks - the number of blocks
454: -  lens - [optional] integer array containing size of each block

456:    Note:
457:    Currently only a limited number of blocking configurations are supported.

459:    Level: intermediate

461: .seealso: `PCBJACOBI`, `PCSetUseAmat()`, `PCBJacobiGetTotalBlocks()`
462: @*/
463: PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
464: {
467:   PetscUseMethod(pc, "PCBJacobiGetLocalBlocks_C", (PC, PetscInt *, const PetscInt *[]), (pc, blocks, lens));
468:   return 0;
469: }

471: /*MC
472:    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
473:            its own `KSP` object.

475:    Options Database Keys:
476: +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
477: -  -pc_bjacobi_blocks <n> - use n total blocks

479:    Notes:
480:     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable point block, and `PCPBJACOBI` for fixed size point block

482:     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.

484:      To set options on the solvers for each block append -sub_ to all the `KSP` and `PC`
485:         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

487:      To set the options on the solvers separate for each block call `PCBJacobiGetSubKSP()`
488:          and set the options directly on the resulting `KSP` object (you can access its `PC`
489:          `KSPGetPC())`

491:      For GPU-based vectors (`VECCUDA`, `VECViennaCL`) it is recommended to use exactly one block per MPI process for best
492:          performance.  Different block partitioning may lead to additional data transfers
493:          between host and GPU that lead to degraded performance.

495:      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.

497:    Level: beginner

499: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCType`,
500:           `PCASM`, `PCSetUseAmat()`, `PCGetUseAmat()`, `PCBJacobiGetSubKSP()`, `PCBJacobiSetTotalBlocks()`,
501:           `PCBJacobiSetLocalBlocks()`, `PCSetModifySubMatrices()`, `PCJACOBI`, `PCVPBJACOBI`, `PCPBJACOBI`
502: M*/

504: PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
505: {
506:   PetscMPIInt rank;
507:   PC_BJacobi *jac;

509:   PetscNew(&jac);
510:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);

512:   pc->ops->apply           = NULL;
513:   pc->ops->matapply        = NULL;
514:   pc->ops->applytranspose  = NULL;
515:   pc->ops->setup           = PCSetUp_BJacobi;
516:   pc->ops->destroy         = PCDestroy_BJacobi;
517:   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
518:   pc->ops->view            = PCView_BJacobi;
519:   pc->ops->applyrichardson = NULL;

521:   pc->data         = (void *)jac;
522:   jac->n           = -1;
523:   jac->n_local     = -1;
524:   jac->first_local = rank;
525:   jac->ksp         = NULL;
526:   jac->g_lens      = NULL;
527:   jac->l_lens      = NULL;
528:   jac->psubcomm    = NULL;

530:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetSubKSP_C", PCBJacobiGetSubKSP_BJacobi);
531:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetTotalBlocks_C", PCBJacobiSetTotalBlocks_BJacobi);
532:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetTotalBlocks_C", PCBJacobiGetTotalBlocks_BJacobi);
533:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiSetLocalBlocks_C", PCBJacobiSetLocalBlocks_BJacobi);
534:   PetscObjectComposeFunction((PetscObject)pc, "PCBJacobiGetLocalBlocks_C", PCBJacobiGetLocalBlocks_BJacobi);
535:   return 0;
536: }

538: /*
539:         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
540: */
541: static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
542: {
543:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
544:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;

546:   KSPReset(jac->ksp[0]);
547:   VecDestroy(&bjac->x);
548:   VecDestroy(&bjac->y);
549:   return 0;
550: }

552: static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
553: {
554:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
555:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;

557:   PCReset_BJacobi_Singleblock(pc);
558:   KSPDestroy(&jac->ksp[0]);
559:   PetscFree(jac->ksp);
560:   PetscFree(bjac);
561:   PCDestroy_BJacobi(pc);
562:   return 0;
563: }

565: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
566: {
567:   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
568:   KSP                subksp = jac->ksp[0];
569:   KSPConvergedReason reason;

571:   KSPSetUp(subksp);
572:   KSPGetConvergedReason(subksp, &reason);
573:   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
574:   return 0;
575: }

577: static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc, Vec x, Vec y)
578: {
579:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
580:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;

582:   VecGetLocalVectorRead(x, bjac->x);
583:   VecGetLocalVector(y, bjac->y);
584:   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
585:      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
586:      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
587:   KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner);
588:   KSPSolve(jac->ksp[0], bjac->x, bjac->y);
589:   KSPCheckSolve(jac->ksp[0], pc, bjac->y);
590:   VecRestoreLocalVectorRead(x, bjac->x);
591:   VecRestoreLocalVector(y, bjac->y);
592:   return 0;
593: }

595: static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc, Mat X, Mat Y)
596: {
597:   PC_BJacobi *jac = (PC_BJacobi *)pc->data;
598:   Mat         sX, sY;

600:   /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
601:      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
602:      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
603:   KSPSetReusePreconditioner(jac->ksp[0], pc->reusepreconditioner);
604:   MatDenseGetLocalMatrix(X, &sX);
605:   MatDenseGetLocalMatrix(Y, &sY);
606:   KSPMatSolve(jac->ksp[0], sX, sY);
607:   return 0;
608: }

610: static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc, Vec x, Vec y)
611: {
612:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
613:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
614:   PetscScalar            *y_array;
615:   const PetscScalar      *x_array;
616:   PC                      subpc;

618:   /*
619:       The VecPlaceArray() is to avoid having to copy the
620:     y vector into the bjac->x vector. The reason for
621:     the bjac->x vector is that we need a sequential vector
622:     for the sequential solve.
623:   */
624:   VecGetArrayRead(x, &x_array);
625:   VecGetArray(y, &y_array);
626:   VecPlaceArray(bjac->x, x_array);
627:   VecPlaceArray(bjac->y, y_array);
628:   /* apply the symmetric left portion of the inner PC operator */
629:   /* note this by-passes the inner KSP and its options completely */
630:   KSPGetPC(jac->ksp[0], &subpc);
631:   PCApplySymmetricLeft(subpc, bjac->x, bjac->y);
632:   VecResetArray(bjac->x);
633:   VecResetArray(bjac->y);
634:   VecRestoreArrayRead(x, &x_array);
635:   VecRestoreArray(y, &y_array);
636:   return 0;
637: }

639: static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc, Vec x, Vec y)
640: {
641:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
642:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
643:   PetscScalar            *y_array;
644:   const PetscScalar      *x_array;
645:   PC                      subpc;

647:   /*
648:       The VecPlaceArray() is to avoid having to copy the
649:     y vector into the bjac->x vector. The reason for
650:     the bjac->x vector is that we need a sequential vector
651:     for the sequential solve.
652:   */
653:   VecGetArrayRead(x, &x_array);
654:   VecGetArray(y, &y_array);
655:   VecPlaceArray(bjac->x, x_array);
656:   VecPlaceArray(bjac->y, y_array);

658:   /* apply the symmetric right portion of the inner PC operator */
659:   /* note this by-passes the inner KSP and its options completely */

661:   KSPGetPC(jac->ksp[0], &subpc);
662:   PCApplySymmetricRight(subpc, bjac->x, bjac->y);

664:   VecResetArray(bjac->x);
665:   VecResetArray(bjac->y);
666:   VecRestoreArrayRead(x, &x_array);
667:   VecRestoreArray(y, &y_array);
668:   return 0;
669: }

671: static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc, Vec x, Vec y)
672: {
673:   PC_BJacobi             *jac  = (PC_BJacobi *)pc->data;
674:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock *)jac->data;
675:   PetscScalar            *y_array;
676:   const PetscScalar      *x_array;

678:   /*
679:       The VecPlaceArray() is to avoid having to copy the
680:     y vector into the bjac->x vector. The reason for
681:     the bjac->x vector is that we need a sequential vector
682:     for the sequential solve.
683:   */
684:   VecGetArrayRead(x, &x_array);
685:   VecGetArray(y, &y_array);
686:   VecPlaceArray(bjac->x, x_array);
687:   VecPlaceArray(bjac->y, y_array);
688:   KSPSolveTranspose(jac->ksp[0], bjac->x, bjac->y);
689:   KSPCheckSolve(jac->ksp[0], pc, bjac->y);
690:   VecResetArray(bjac->x);
691:   VecResetArray(bjac->y);
692:   VecRestoreArrayRead(x, &x_array);
693:   VecRestoreArray(y, &y_array);
694:   return 0;
695: }

697: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc, Mat mat, Mat pmat)
698: {
699:   PC_BJacobi             *jac = (PC_BJacobi *)pc->data;
700:   PetscInt                m;
701:   KSP                     ksp;
702:   PC_BJacobi_Singleblock *bjac;
703:   PetscBool               wasSetup = PETSC_TRUE;
704:   VecType                 vectype;
705:   const char             *prefix;

707:   if (!pc->setupcalled) {
708:     if (!jac->ksp) {
709:       wasSetup = PETSC_FALSE;

711:       KSPCreate(PETSC_COMM_SELF, &ksp);
712:       KSPSetErrorIfNotConverged(ksp, pc->erroriffailure);
713:       PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1);
714:       KSPSetType(ksp, KSPPREONLY);
715:       PCGetOptionsPrefix(pc, &prefix);
716:       KSPSetOptionsPrefix(ksp, prefix);
717:       KSPAppendOptionsPrefix(ksp, "sub_");

719:       pc->ops->reset               = PCReset_BJacobi_Singleblock;
720:       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
721:       pc->ops->apply               = PCApply_BJacobi_Singleblock;
722:       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
723:       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
724:       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
725:       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
726:       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;

728:       PetscMalloc1(1, &jac->ksp);
729:       jac->ksp[0] = ksp;

731:       PetscNew(&bjac);
732:       jac->data = (void *)bjac;
733:     } else {
734:       ksp  = jac->ksp[0];
735:       bjac = (PC_BJacobi_Singleblock *)jac->data;
736:     }

738:     /*
739:       The reason we need to generate these vectors is to serve
740:       as the right-hand side and solution vector for the solve on the
741:       block. We do not need to allocate space for the vectors since
742:       that is provided via VecPlaceArray() just before the call to
743:       KSPSolve() on the block.
744:     */
745:     MatGetSize(pmat, &m, &m);
746:     VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->x);
747:     VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &bjac->y);
748:     MatGetVecType(pmat, &vectype);
749:     VecSetType(bjac->x, vectype);
750:     VecSetType(bjac->y, vectype);
751:   } else {
752:     ksp  = jac->ksp[0];
753:     bjac = (PC_BJacobi_Singleblock *)jac->data;
754:   }
755:   KSPGetOptionsPrefix(ksp, &prefix);
756:   if (pc->useAmat) {
757:     KSPSetOperators(ksp, mat, pmat);
758:     MatSetOptionsPrefix(mat, prefix);
759:   } else {
760:     KSPSetOperators(ksp, pmat, pmat);
761:   }
762:   MatSetOptionsPrefix(pmat, prefix);
763:   if (!wasSetup && pc->setfromoptionscalled) {
764:     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
765:     KSPSetFromOptions(ksp);
766:   }
767:   return 0;
768: }

770: static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
771: {
772:   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
773:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
774:   PetscInt               i;

776:   if (bjac && bjac->pmat) {
777:     MatDestroyMatrices(jac->n_local, &bjac->pmat);
778:     if (pc->useAmat) MatDestroyMatrices(jac->n_local, &bjac->mat);
779:   }

781:   for (i = 0; i < jac->n_local; i++) {
782:     KSPReset(jac->ksp[i]);
783:     if (bjac && bjac->x) {
784:       VecDestroy(&bjac->x[i]);
785:       VecDestroy(&bjac->y[i]);
786:       ISDestroy(&bjac->is[i]);
787:     }
788:   }
789:   PetscFree(jac->l_lens);
790:   PetscFree(jac->g_lens);
791:   return 0;
792: }

794: static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
795: {
796:   PC_BJacobi            *jac  = (PC_BJacobi *)pc->data;
797:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
798:   PetscInt               i;

800:   PCReset_BJacobi_Multiblock(pc);
801:   if (bjac) {
802:     PetscFree2(bjac->x, bjac->y);
803:     PetscFree(bjac->starts);
804:     PetscFree(bjac->is);
805:   }
806:   PetscFree(jac->data);
807:   for (i = 0; i < jac->n_local; i++) KSPDestroy(&jac->ksp[i]);
808:   PetscFree(jac->ksp);
809:   PCDestroy_BJacobi(pc);
810:   return 0;
811: }

813: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
814: {
815:   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
816:   PetscInt           i, n_local = jac->n_local;
817:   KSPConvergedReason reason;

819:   for (i = 0; i < n_local; i++) {
820:     KSPSetUp(jac->ksp[i]);
821:     KSPGetConvergedReason(jac->ksp[i], &reason);
822:     if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
823:   }
824:   return 0;
825: }

827: static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc, Vec x, Vec y)
828: {
829:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
830:   PetscInt               i, n_local = jac->n_local;
831:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
832:   PetscScalar           *yin;
833:   const PetscScalar     *xin;

835:   VecGetArrayRead(x, &xin);
836:   VecGetArray(y, &yin);
837:   for (i = 0; i < n_local; i++) {
838:     /*
839:        To avoid copying the subvector from x into a workspace we instead
840:        make the workspace vector array point to the subpart of the array of
841:        the global vector.
842:     */
843:     VecPlaceArray(bjac->x[i], xin + bjac->starts[i]);
844:     VecPlaceArray(bjac->y[i], yin + bjac->starts[i]);

846:     PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0);
847:     KSPSolve(jac->ksp[i], bjac->x[i], bjac->y[i]);
848:     KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]);
849:     PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0);

851:     VecResetArray(bjac->x[i]);
852:     VecResetArray(bjac->y[i]);
853:   }
854:   VecRestoreArrayRead(x, &xin);
855:   VecRestoreArray(y, &yin);
856:   return 0;
857: }

859: static PetscErrorCode PCApplySymmetricLeft_BJacobi_Multiblock(PC pc, Vec x, Vec y)
860: {
861:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
862:   PetscInt               i, n_local = jac->n_local;
863:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
864:   PetscScalar           *yin;
865:   const PetscScalar     *xin;
866:   PC                     subpc;

868:   VecGetArrayRead(x, &xin);
869:   VecGetArray(y, &yin);
870:   for (i = 0; i < n_local; i++) {
871:     /*
872:        To avoid copying the subvector from x into a workspace we instead
873:        make the workspace vector array point to the subpart of the array of
874:        the global vector.
875:     */
876:     VecPlaceArray(bjac->x[i], xin + bjac->starts[i]);
877:     VecPlaceArray(bjac->y[i], yin + bjac->starts[i]);

879:     PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0);
880:     /* apply the symmetric left portion of the inner PC operator */
881:     /* note this by-passes the inner KSP and its options completely */
882:     KSPGetPC(jac->ksp[i], &subpc);
883:     PCApplySymmetricLeft(subpc, bjac->x[i], bjac->y[i]);
884:     PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0);

886:     VecResetArray(bjac->x[i]);
887:     VecResetArray(bjac->y[i]);
888:   }
889:   VecRestoreArrayRead(x, &xin);
890:   VecRestoreArray(y, &yin);
891:   return 0;
892: }

894: static PetscErrorCode PCApplySymmetricRight_BJacobi_Multiblock(PC pc, Vec x, Vec y)
895: {
896:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
897:   PetscInt               i, n_local = jac->n_local;
898:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
899:   PetscScalar           *yin;
900:   const PetscScalar     *xin;
901:   PC                     subpc;

903:   VecGetArrayRead(x, &xin);
904:   VecGetArray(y, &yin);
905:   for (i = 0; i < n_local; i++) {
906:     /*
907:        To avoid copying the subvector from x into a workspace we instead
908:        make the workspace vector array point to the subpart of the array of
909:        the global vector.
910:     */
911:     VecPlaceArray(bjac->x[i], xin + bjac->starts[i]);
912:     VecPlaceArray(bjac->y[i], yin + bjac->starts[i]);

914:     PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0);
915:     /* apply the symmetric left portion of the inner PC operator */
916:     /* note this by-passes the inner KSP and its options completely */
917:     KSPGetPC(jac->ksp[i], &subpc);
918:     PCApplySymmetricRight(subpc, bjac->x[i], bjac->y[i]);
919:     PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0);

921:     VecResetArray(bjac->x[i]);
922:     VecResetArray(bjac->y[i]);
923:   }
924:   VecRestoreArrayRead(x, &xin);
925:   VecRestoreArray(y, &yin);
926:   return 0;
927: }

929: static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc, Vec x, Vec y)
930: {
931:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
932:   PetscInt               i, n_local = jac->n_local;
933:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
934:   PetscScalar           *yin;
935:   const PetscScalar     *xin;

937:   VecGetArrayRead(x, &xin);
938:   VecGetArray(y, &yin);
939:   for (i = 0; i < n_local; i++) {
940:     /*
941:        To avoid copying the subvector from x into a workspace we instead
942:        make the workspace vector array point to the subpart of the array of
943:        the global vector.
944:     */
945:     VecPlaceArray(bjac->x[i], xin + bjac->starts[i]);
946:     VecPlaceArray(bjac->y[i], yin + bjac->starts[i]);

948:     PetscLogEventBegin(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0);
949:     KSPSolveTranspose(jac->ksp[i], bjac->x[i], bjac->y[i]);
950:     KSPCheckSolve(jac->ksp[i], pc, bjac->y[i]);
951:     PetscLogEventEnd(PC_ApplyTransposeOnBlocks, jac->ksp[i], bjac->x[i], bjac->y[i], 0);

953:     VecResetArray(bjac->x[i]);
954:     VecResetArray(bjac->y[i]);
955:   }
956:   VecRestoreArrayRead(x, &xin);
957:   VecRestoreArray(y, &yin);
958:   return 0;
959: }

961: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc, Mat mat, Mat pmat)
962: {
963:   PC_BJacobi            *jac = (PC_BJacobi *)pc->data;
964:   PetscInt               m, n_local, N, M, start, i;
965:   const char            *prefix;
966:   KSP                    ksp;
967:   Vec                    x, y;
968:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock *)jac->data;
969:   PC                     subpc;
970:   IS                     is;
971:   MatReuse               scall;
972:   VecType                vectype;

974:   MatGetLocalSize(pc->pmat, &M, &N);

976:   n_local = jac->n_local;

978:   if (pc->useAmat) {
979:     PetscBool same;
980:     PetscObjectTypeCompare((PetscObject)mat, ((PetscObject)pmat)->type_name, &same);
982:   }

984:   if (!pc->setupcalled) {
985:     scall = MAT_INITIAL_MATRIX;

987:     if (!jac->ksp) {
988:       pc->ops->reset               = PCReset_BJacobi_Multiblock;
989:       pc->ops->destroy             = PCDestroy_BJacobi_Multiblock;
990:       pc->ops->apply               = PCApply_BJacobi_Multiblock;
991:       pc->ops->matapply            = NULL;
992:       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Multiblock;
993:       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Multiblock;
994:       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Multiblock;
995:       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Multiblock;

997:       PetscNew(&bjac);
998:       PetscMalloc1(n_local, &jac->ksp);
999:       PetscMalloc2(n_local, &bjac->x, n_local, &bjac->y);
1000:       PetscMalloc1(n_local, &bjac->starts);

1002:       jac->data = (void *)bjac;
1003:       PetscMalloc1(n_local, &bjac->is);

1005:       for (i = 0; i < n_local; i++) {
1006:         KSPCreate(PETSC_COMM_SELF, &ksp);
1007:         KSPSetErrorIfNotConverged(ksp, pc->erroriffailure);
1008:         PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1);
1009:         KSPSetType(ksp, KSPPREONLY);
1010:         KSPGetPC(ksp, &subpc);
1011:         PCGetOptionsPrefix(pc, &prefix);
1012:         KSPSetOptionsPrefix(ksp, prefix);
1013:         KSPAppendOptionsPrefix(ksp, "sub_");

1015:         jac->ksp[i] = ksp;
1016:       }
1017:     } else {
1018:       bjac = (PC_BJacobi_Multiblock *)jac->data;
1019:     }

1021:     start = 0;
1022:     MatGetVecType(pmat, &vectype);
1023:     for (i = 0; i < n_local; i++) {
1024:       m = jac->l_lens[i];
1025:       /*
1026:       The reason we need to generate these vectors is to serve
1027:       as the right-hand side and solution vector for the solve on the
1028:       block. We do not need to allocate space for the vectors since
1029:       that is provided via VecPlaceArray() just before the call to
1030:       KSPSolve() on the block.

1032:       */
1033:       VecCreateSeq(PETSC_COMM_SELF, m, &x);
1034:       VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, NULL, &y);
1035:       VecSetType(x, vectype);
1036:       VecSetType(y, vectype);

1038:       bjac->x[i]      = x;
1039:       bjac->y[i]      = y;
1040:       bjac->starts[i] = start;

1042:       ISCreateStride(PETSC_COMM_SELF, m, start, 1, &is);
1043:       bjac->is[i] = is;

1045:       start += m;
1046:     }
1047:   } else {
1048:     bjac = (PC_BJacobi_Multiblock *)jac->data;
1049:     /*
1050:        Destroy the blocks from the previous iteration
1051:     */
1052:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1053:       MatDestroyMatrices(n_local, &bjac->pmat);
1054:       if (pc->useAmat) MatDestroyMatrices(n_local, &bjac->mat);
1055:       scall = MAT_INITIAL_MATRIX;
1056:     } else scall = MAT_REUSE_MATRIX;
1057:   }

1059:   MatCreateSubMatrices(pmat, n_local, bjac->is, bjac->is, scall, &bjac->pmat);
1060:   if (pc->useAmat) MatCreateSubMatrices(mat, n_local, bjac->is, bjac->is, scall, &bjac->mat);
1061:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1062:      different boundary conditions for the submatrices than for the global problem) */
1063:   PCModifySubMatrices(pc, n_local, bjac->is, bjac->is, bjac->pmat, pc->modifysubmatricesP);

1065:   for (i = 0; i < n_local; i++) {
1066:     KSPGetOptionsPrefix(jac->ksp[i], &prefix);
1067:     if (pc->useAmat) {
1068:       KSPSetOperators(jac->ksp[i], bjac->mat[i], bjac->pmat[i]);
1069:       MatSetOptionsPrefix(bjac->mat[i], prefix);
1070:     } else {
1071:       KSPSetOperators(jac->ksp[i], bjac->pmat[i], bjac->pmat[i]);
1072:     }
1073:     MatSetOptionsPrefix(bjac->pmat[i], prefix);
1074:     if (pc->setfromoptionscalled) KSPSetFromOptions(jac->ksp[i]);
1075:   }
1076:   return 0;
1077: }

1079: /*
1080:       These are for a single block with multiple processes
1081: */
1082: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiproc(PC pc)
1083: {
1084:   PC_BJacobi        *jac    = (PC_BJacobi *)pc->data;
1085:   KSP                subksp = jac->ksp[0];
1086:   KSPConvergedReason reason;

1088:   KSPSetUp(subksp);
1089:   KSPGetConvergedReason(subksp, &reason);
1090:   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1091:   return 0;
1092: }

1094: static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1095: {
1096:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1097:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;

1099:   VecDestroy(&mpjac->ysub);
1100:   VecDestroy(&mpjac->xsub);
1101:   MatDestroy(&mpjac->submats);
1102:   if (jac->ksp) KSPReset(jac->ksp[0]);
1103:   return 0;
1104: }

1106: static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1107: {
1108:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1109:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;

1111:   PCReset_BJacobi_Multiproc(pc);
1112:   KSPDestroy(&jac->ksp[0]);
1113:   PetscFree(jac->ksp);
1114:   PetscSubcommDestroy(&mpjac->psubcomm);

1116:   PetscFree(mpjac);
1117:   PCDestroy_BJacobi(pc);
1118:   return 0;
1119: }

1121: static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc, Vec x, Vec y)
1122: {
1123:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1124:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1125:   PetscScalar          *yarray;
1126:   const PetscScalar    *xarray;
1127:   KSPConvergedReason    reason;

1129:   /* place x's and y's local arrays into xsub and ysub */
1130:   VecGetArrayRead(x, &xarray);
1131:   VecGetArray(y, &yarray);
1132:   VecPlaceArray(mpjac->xsub, xarray);
1133:   VecPlaceArray(mpjac->ysub, yarray);

1135:   /* apply preconditioner on each matrix block */
1136:   PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0);
1137:   KSPSolve(jac->ksp[0], mpjac->xsub, mpjac->ysub);
1138:   KSPCheckSolve(jac->ksp[0], pc, mpjac->ysub);
1139:   PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], mpjac->xsub, mpjac->ysub, 0);
1140:   KSPGetConvergedReason(jac->ksp[0], &reason);
1141:   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;

1143:   VecResetArray(mpjac->xsub);
1144:   VecResetArray(mpjac->ysub);
1145:   VecRestoreArrayRead(x, &xarray);
1146:   VecRestoreArray(y, &yarray);
1147:   return 0;
1148: }

1150: static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc, Mat X, Mat Y)
1151: {
1152:   PC_BJacobi        *jac = (PC_BJacobi *)pc->data;
1153:   KSPConvergedReason reason;
1154:   Mat                sX, sY;
1155:   const PetscScalar *x;
1156:   PetscScalar       *y;
1157:   PetscInt           m, N, lda, ldb;

1159:   /* apply preconditioner on each matrix block */
1160:   MatGetLocalSize(X, &m, NULL);
1161:   MatGetSize(X, NULL, &N);
1162:   MatDenseGetLDA(X, &lda);
1163:   MatDenseGetLDA(Y, &ldb);
1164:   MatDenseGetArrayRead(X, &x);
1165:   MatDenseGetArrayWrite(Y, &y);
1166:   MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, (PetscScalar *)x, &sX);
1167:   MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]), m, PETSC_DECIDE, PETSC_DECIDE, N, y, &sY);
1168:   MatDenseSetLDA(sX, lda);
1169:   MatDenseSetLDA(sY, ldb);
1170:   PetscLogEventBegin(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0);
1171:   KSPMatSolve(jac->ksp[0], sX, sY);
1172:   KSPCheckSolve(jac->ksp[0], pc, NULL);
1173:   PetscLogEventEnd(PC_ApplyOnBlocks, jac->ksp[0], X, Y, 0);
1174:   MatDestroy(&sY);
1175:   MatDestroy(&sX);
1176:   MatDenseRestoreArrayWrite(Y, &y);
1177:   MatDenseRestoreArrayRead(X, &x);
1178:   KSPGetConvergedReason(jac->ksp[0], &reason);
1179:   if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR;
1180:   return 0;
1181: }

1183: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1184: {
1185:   PC_BJacobi           *jac   = (PC_BJacobi *)pc->data;
1186:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc *)jac->data;
1187:   PetscInt              m, n;
1188:   MPI_Comm              comm, subcomm = 0;
1189:   const char           *prefix;
1190:   PetscBool             wasSetup = PETSC_TRUE;
1191:   VecType               vectype;

1193:   PetscObjectGetComm((PetscObject)pc, &comm);
1195:   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1196:   if (!pc->setupcalled) {
1197:     wasSetup = PETSC_FALSE;
1198:     PetscNew(&mpjac);
1199:     jac->data = (void *)mpjac;

1201:     /* initialize datastructure mpjac */
1202:     if (!jac->psubcomm) {
1203:       /* Create default contiguous subcommunicatiors if user does not provide them */
1204:       PetscSubcommCreate(comm, &jac->psubcomm);
1205:       PetscSubcommSetNumber(jac->psubcomm, jac->n);
1206:       PetscSubcommSetType(jac->psubcomm, PETSC_SUBCOMM_CONTIGUOUS);
1207:     }
1208:     mpjac->psubcomm = jac->psubcomm;
1209:     subcomm         = PetscSubcommChild(mpjac->psubcomm);

1211:     /* Get matrix blocks of pmat */
1212:     MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats);

1214:     /* create a new PC that processors in each subcomm have copy of */
1215:     PetscMalloc1(1, &jac->ksp);
1216:     KSPCreate(subcomm, &jac->ksp[0]);
1217:     KSPSetErrorIfNotConverged(jac->ksp[0], pc->erroriffailure);
1218:     PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0], (PetscObject)pc, 1);
1219:     KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats);
1220:     KSPGetPC(jac->ksp[0], &mpjac->pc);

1222:     PCGetOptionsPrefix(pc, &prefix);
1223:     KSPSetOptionsPrefix(jac->ksp[0], prefix);
1224:     KSPAppendOptionsPrefix(jac->ksp[0], "sub_");
1225:     KSPGetOptionsPrefix(jac->ksp[0], &prefix);
1226:     MatSetOptionsPrefix(mpjac->submats, prefix);

1228:     /* create dummy vectors xsub and ysub */
1229:     MatGetLocalSize(mpjac->submats, &m, &n);
1230:     VecCreateMPIWithArray(subcomm, 1, n, PETSC_DECIDE, NULL, &mpjac->xsub);
1231:     VecCreateMPIWithArray(subcomm, 1, m, PETSC_DECIDE, NULL, &mpjac->ysub);
1232:     MatGetVecType(mpjac->submats, &vectype);
1233:     VecSetType(mpjac->xsub, vectype);
1234:     VecSetType(mpjac->ysub, vectype);

1236:     pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiproc;
1237:     pc->ops->reset         = PCReset_BJacobi_Multiproc;
1238:     pc->ops->destroy       = PCDestroy_BJacobi_Multiproc;
1239:     pc->ops->apply         = PCApply_BJacobi_Multiproc;
1240:     pc->ops->matapply      = PCMatApply_BJacobi_Multiproc;
1241:   } else { /* pc->setupcalled */
1242:     subcomm = PetscSubcommChild(mpjac->psubcomm);
1243:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1244:       /* destroy old matrix blocks, then get new matrix blocks */
1245:       if (mpjac->submats) MatDestroy(&mpjac->submats);
1246:       MatGetMultiProcBlock(pc->pmat, subcomm, MAT_INITIAL_MATRIX, &mpjac->submats);
1247:     } else {
1248:       MatGetMultiProcBlock(pc->pmat, subcomm, MAT_REUSE_MATRIX, &mpjac->submats);
1249:     }
1250:     KSPSetOperators(jac->ksp[0], mpjac->submats, mpjac->submats);
1251:   }

1253:   if (!wasSetup && pc->setfromoptionscalled) KSPSetFromOptions(jac->ksp[0]);
1254:   return 0;
1255: }