Actual source code: pcmpi.c

  1: /*
  2:     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
  3:     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.

  5:     That program may use OpenMP to compute the right hand side and matrix for the linear system

  7:     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD

  9:     The resulting KSP and PC can only be controlled via the options database, though some common commands
 10:     could be passed through the server.

 12: */
 13: #include <petsc/private/pcimpl.h>
 14: #include <petsc/private/kspimpl.h>
 15: #include <petsc.h>

 17: #define PC_MPI_MAX_RANKS  256
 18: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD

 20: typedef struct {
 21:   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
 22:   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
 23:   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
 24:   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
 25:   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
 26: } PC_MPI;

 28: typedef enum {
 29:   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
 30:   PCMPI_CREATE,
 31:   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
 32:   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
 33:   PCMPI_SOLVE,
 34:   PCMPI_VIEW,
 35:   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
 36: } PCMPICommand;

 38: static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
 39: static PetscBool PCMPICommSet = PETSC_FALSE;
 40: static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
 41: static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;

 43: static PetscErrorCode PCMPICommsCreate(void)
 44: {
 45:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 46:   PetscMPIInt size, rank, i;

 48:   MPI_Comm_size(comm, &size);
 50:   MPI_Comm_rank(comm, &rank);
 51:   /* comm for size 1 is useful only for debugging */
 52:   for (i = 0; i < size; i++) {
 53:     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
 54:     MPI_Comm_split(comm, color, 0, &PCMPIComms[i]);
 55:     PCMPISolveCounts[i] = 0;
 56:     PCMPIKSPCounts[i]   = 0;
 57:     PCMPIIterations[i]  = 0;
 58:     PCMPISizes[i]       = 0;
 59:   }
 60:   PCMPICommSet = PETSC_TRUE;
 61:   return 0;
 62: }

 64: PetscErrorCode PCMPICommsDestroy(void)
 65: {
 66:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 67:   PetscMPIInt size, rank, i;

 69:   if (!PCMPICommSet) return 0;
 70:   MPI_Comm_size(comm, &size);
 71:   MPI_Comm_rank(comm, &rank);
 72:   for (i = 0; i < size; i++) {
 73:     if (PCMPIComms[i] != MPI_COMM_NULL) MPI_Comm_free(&PCMPIComms[i]);
 74:   }
 75:   PCMPICommSet = PETSC_FALSE;
 76:   return 0;
 77: }

 79: static PetscErrorCode PCMPICreate(PC pc)
 80: {
 81:   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
 82:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 83:   KSP         ksp;
 84:   PetscInt    N[2], mincntperrank = 0;
 85:   PetscMPIInt size;
 86:   Mat         sA;
 87:   char       *prefix;
 88:   PetscMPIInt len = 0;

 90:   if (!PCMPICommSet) PCMPICommsCreate();
 91:   MPI_Comm_size(comm, &size);
 92:   if (pc) {
 93:     if (size == 1) PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n");
 94:     PCGetOperators(pc, &sA, &sA);
 95:     MatGetSize(sA, &N[0], &N[1]);
 96:   }
 97:   MPI_Bcast(N, 2, MPIU_INT, 0, comm);

 99:   /* choose a suitable sized MPI_Comm for the problem to be solved on */
100:   if (km) mincntperrank = km->mincntperrank;
101:   MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm);
102:   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
103:   if (comm == MPI_COMM_NULL) {
104:     ksp = NULL;
105:     return 0;
106:   }
107:   PetscLogStagePush(PCMPIStage);
108:   KSPCreate(comm, &ksp);
109:   PetscLogStagePop();
110:   MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm);
111:   if (pc) {
112:     size_t slen;

114:     MPI_Comm_size(comm, &size);
115:     PCMPIKSPCounts[size - 1]++;
116:     PCGetOptionsPrefix(pc, (const char **)&prefix);
117:     PetscStrlen(prefix, &slen);
118:     len = (PetscMPIInt)slen;
119:   }
120:   MPI_Bcast(&len, 1, MPI_INT, 0, comm);
121:   if (len) {
122:     if (!pc) PetscMalloc1(len + 1, &prefix);
123:     MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm);
124:     KSPSetOptionsPrefix(ksp, prefix);
125:   }
126:   KSPAppendOptionsPrefix(ksp, "mpi_");
127:   return 0;
128: }

130: static PetscErrorCode PCMPISetMat(PC pc)
131: {
132:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
133:   Mat                A;
134:   PetscInt           m, n, *ia, *ja, j, bs;
135:   Mat                sA;
136:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
137:   KSP                ksp;
138:   PetscLayout        layout;
139:   const PetscInt    *IA = NULL, *JA = NULL;
140:   const PetscInt    *range;
141:   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
142:   PetscScalar       *a;
143:   const PetscScalar *sa               = NULL;
144:   PetscInt           matproperties[7] = {0, 0, 0, 0, 0, 0, 0};

146:   MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
147:   if (!ksp) return 0;
148:   PetscObjectGetComm((PetscObject)ksp, &comm);
149:   if (pc) {
150:     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;

152:     MPI_Comm_size(comm, &size);
153:     PCMPIMatCounts[size - 1]++;
154:     PCGetOperators(pc, &sA, &sA);
155:     MatGetSize(sA, &matproperties[0], &matproperties[1]);
156:     MatGetBlockSize(sA, &bs);
157:     matproperties[2] = bs;
158:     MatIsSymmetricKnown(sA, &isset, &issymmetric);
159:     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
160:     MatIsHermitianKnown(sA, &isset, &ishermitian);
161:     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
162:     MatIsSPDKnown(sA, &isset, &isspd);
163:     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
164:     MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric);
165:     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
166:   }
167:   MPI_Bcast(matproperties, 7, MPIU_INT, 0, comm);

169:   /* determine ownership ranges of matrix columns */
170:   PetscLayoutCreate(comm, &layout);
171:   PetscLayoutSetBlockSize(layout, matproperties[2]);
172:   PetscLayoutSetSize(layout, matproperties[1]);
173:   PetscLayoutSetUp(layout);
174:   PetscLayoutGetLocalSize(layout, &n);
175:   PetscLayoutDestroy(&layout);

177:   /* determine ownership ranges of matrix rows */
178:   PetscLayoutCreate(comm, &layout);
179:   PetscLayoutSetBlockSize(layout, matproperties[2]);
180:   PetscLayoutSetSize(layout, matproperties[0]);
181:   PetscLayoutSetUp(layout);
182:   PetscLayoutGetLocalSize(layout, &m);

184:   /* copy over the matrix nonzero structure and values */
185:   if (pc) {
186:     NZ      = km->NZ;
187:     NZdispl = km->NZdispl;
188:     PetscLayoutGetRanges(layout, &range);
189:     MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL);
190:     for (i = 0; i < size; i++) {
191:       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
192:       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
193:     }
194:     displi[0]  = 0;
195:     NZdispl[0] = 0;
196:     for (j = 1; j < size; j++) {
197:       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
198:       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
199:     }
200:     MatSeqAIJGetArrayRead(sA, &sa);
201:   }
202:   PetscLayoutDestroy(&layout);
203:   MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm);

205:   PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a);
206:   MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm);
207:   MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm);
208:   MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm);

210:   if (pc) {
211:     MatSeqAIJRestoreArrayRead(sA, &sa);
212:     MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL);
213:   }

215:   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
216:   ia[0] = 0;
217:   PetscLogStagePush(PCMPIStage);
218:   MatCreateMPIAIJWithArrays(comm, m, n, matproperties[0], matproperties[1], ia, ja, a, &A);
219:   MatSetBlockSize(A, matproperties[2]);
220:   MatSetOptionsPrefix(A, "mpi_");
221:   if (matproperties[3]) MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE);
222:   if (matproperties[4]) MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE);
223:   if (matproperties[5]) MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE);
224:   if (matproperties[6]) MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE);

226:   PetscFree3(ia, ja, a);
227:   KSPSetOperators(ksp, A, A);
228:   if (!ksp->vec_sol) MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs);
229:   PetscLogStagePop();
230:   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
231:     const PetscInt *range;

233:     VecGetOwnershipRanges(ksp->vec_sol, &range);
234:     for (i = 0; i < size; i++) {
235:       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
236:       km->displ[i]     = (PetscMPIInt)range[i];
237:     }
238:   }
239:   MatDestroy(&A);
240:   KSPSetFromOptions(ksp);
241:   return 0;
242: }

244: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
245: {
246:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
247:   KSP                ksp;
248:   Mat                sA, A;
249:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
250:   PetscScalar       *a;
251:   PetscCount         nz;
252:   const PetscScalar *sa = NULL;
253:   PetscMPIInt        size;
254:   PetscInt           matproperties[4] = {0, 0, 0, 0};

256:   if (pc) {
257:     PCGetOperators(pc, &sA, &sA);
258:     MatSeqAIJGetArrayRead(sA, &sa);
259:   }
260:   MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
261:   if (!ksp) return 0;
262:   PetscObjectGetComm((PetscObject)ksp, &comm);
263:   MPI_Comm_size(comm, &size);
264:   PCMPIMatCounts[size - 1]++;
265:   KSPGetOperators(ksp, NULL, &A);
266:   MatMPIAIJGetNumberNonzeros(A, &nz);
267:   PetscMalloc1(nz, &a);
268:   MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm);
269:   if (pc) {
270:     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;

272:     MatSeqAIJRestoreArrayRead(sA, &sa);

274:     MatIsSymmetricKnown(sA, &isset, &issymmetric);
275:     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
276:     MatIsHermitianKnown(sA, &isset, &ishermitian);
277:     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
278:     MatIsSPDKnown(sA, &isset, &isspd);
279:     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
280:     MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric);
281:     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
282:   }
283:   MatUpdateMPIAIJWithArray(A, a);
284:   PetscFree(a);
285:   MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm);
286:   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
287:   if (matproperties[0]) MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE);
288:   if (matproperties[1]) MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE);
289:   if (matproperties[2]) MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE);
290:   if (matproperties[3]) MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE);
291:   return 0;
292: }

294: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
295: {
296:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
297:   KSP                ksp;
298:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
299:   const PetscScalar *sb   = NULL, *x;
300:   PetscScalar       *b, *sx = NULL;
301:   PetscInt           its, n;
302:   PetscMPIInt        size;

304:   MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
305:   if (!ksp) return 0;
306:   PetscObjectGetComm((PetscObject)ksp, &comm);

308:   /* TODO: optimize code to not require building counts/displ every time */

310:   /* scatterv rhs */
311:   MPI_Comm_size(comm, &size);
312:   if (pc) {
313:     PetscInt N;

315:     PCMPISolveCounts[size - 1]++;
316:     MatGetSize(pc->pmat, &N, NULL);
317:     ;
318:     PCMPISizes[size - 1] += N;
319:     VecGetArrayRead(B, &sb);
320:   }
321:   VecGetLocalSize(ksp->vec_rhs, &n);
322:   VecGetArray(ksp->vec_rhs, &b);
323:   MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm);
324:   VecRestoreArray(ksp->vec_rhs, &b);
325:   if (pc) VecRestoreArrayRead(B, &sb);

327:   PetscLogStagePush(PCMPIStage);
328:   KSPSolve(ksp, NULL, NULL);
329:   PetscLogStagePop();
330:   KSPGetIterationNumber(ksp, &its);
331:   PCMPIIterations[size - 1] += its;

333:   /* gather solution */
334:   VecGetArrayRead(ksp->vec_sol, &x);
335:   if (pc) VecGetArray(X, &sx);
336:   MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm);
337:   if (pc) VecRestoreArray(X, &sx);
338:   VecRestoreArrayRead(ksp->vec_sol, &x);
339:   return 0;
340: }

342: static PetscErrorCode PCMPIDestroy(PC pc)
343: {
344:   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
345:   KSP      ksp;
346:   MPI_Comm comm = PC_MPI_COMM_WORLD;

348:   MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
349:   if (!ksp) return 0;
350:   KSPDestroy(&ksp);
351:   return 0;
352: }

354: /*@C
355:      PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
356:      parallel `KSP` solves and management of parallel `KSP` objects.

358:      Logically collective on all MPI ranks except 0

360:    Options Database Keys:
361: +   -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
362: -   -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server

364:      Note:
365:       This is normally started automatically in `PetscInitialize()` when the option is provided

367:      Developer Notes:
368:        When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
369:        written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
370:        (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.

372:        Can this be integrated into the `PetscDevice` abstraction that is currently being developed?

374:      Level: developer

376: .seealso: `PCMPIServerEnd()`, `PCMPI`
377: @*/
378: PetscErrorCode PCMPIServerBegin(void)
379: {
380:   PetscMPIInt rank;

382:   PetscInfo(NULL, "Starting MPI Linear Solver Server");
383:   if (PetscDefined(USE_SINGLE_LIBRARY)) {
384:     VecInitializePackage();
385:     MatInitializePackage();
386:     DMInitializePackage();
387:     PCInitializePackage();
388:     KSPInitializePackage();
389:     SNESInitializePackage();
390:     TSInitializePackage();
391:     TaoInitializePackage();
392:   }

394:   MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank);
395:   if (rank == 0) {
396:     PETSC_COMM_WORLD = PETSC_COMM_SELF;
397:     return 0;
398:   }

400:   while (PETSC_TRUE) {
401:     PCMPICommand request = PCMPI_CREATE;
402:     MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD);
403:     switch (request) {
404:     case PCMPI_CREATE:
405:       PCMPICreate(NULL);
406:       break;
407:     case PCMPI_SET_MAT:
408:       PCMPISetMat(NULL);
409:       break;
410:     case PCMPI_UPDATE_MAT_VALUES:
411:       PCMPIUpdateMatValues(NULL);
412:       break;
413:     case PCMPI_VIEW:
414:       // PCMPIView(NULL);
415:       break;
416:     case PCMPI_SOLVE:
417:       PCMPISolve(NULL, NULL, NULL);
418:       break;
419:     case PCMPI_DESTROY:
420:       PCMPIDestroy(NULL);
421:       break;
422:     case PCMPI_EXIT:
423:       PetscFinalize();
424:       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
425:       break;
426:     default:
427:       break;
428:     }
429:   }
430:   return 0;
431: }

433: /*@C
434:      PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
435:      parallel KSP solves and management of parallel `KSP` objects.

437:      Logically collective on all MPI ranks except 0

439:      Note:
440:       This is normally ended automatically in `PetscFinalize()` when the option is provided

442:      Level: developer

444: .seealso: `PCMPIServerBegin()`, `PCMPI`
445: @*/
446: PetscErrorCode PCMPIServerEnd(void)
447: {
448:   PCMPICommand request = PCMPI_EXIT;

450:   if (PetscGlobalRank == 0) {
451:     PetscViewer       viewer = NULL;
452:     PetscViewerFormat format;

454:     MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD);
455:     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
456:     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
457:     PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL);
458:     PetscOptionsEnd();
459:     if (viewer) {
460:       PetscBool isascii;

462:       PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
463:       if (isascii) {
464:         PetscMPIInt size;
465:         PetscMPIInt i;

467:         MPI_Comm_size(PETSC_COMM_WORLD, &size);
468:         PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n");
469:         PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n");
470:         if (PCMPIKSPCountsSeq) {
471:           PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq);
472:         }
473:         for (i = 0; i < size; i++) {
474:           if (PCMPIKSPCounts[i]) {
475:             PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]);
476:           }
477:         }
478:       }
479:       PetscViewerDestroy(&viewer);
480:     }
481:   }
482:   PCMPICommsDestroy();
483:   return 0;
484: }

486: /*
487:     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
488:     because, for example, the problem is small. This version is more efficient because it does not require copying any data
489: */
490: static PetscErrorCode PCSetUp_Seq(PC pc)
491: {
492:   PC_MPI     *km = (PC_MPI *)pc->data;
493:   Mat         sA;
494:   const char *prefix;

496:   PCGetOperators(pc, NULL, &sA);
497:   PCGetOptionsPrefix(pc, &prefix);
498:   KSPCreate(PETSC_COMM_SELF, &km->ksps[0]);
499:   KSPSetOptionsPrefix(km->ksps[0], prefix);
500:   KSPAppendOptionsPrefix(km->ksps[0], "mpi_");
501:   KSPSetOperators(km->ksps[0], sA, sA);
502:   KSPSetFromOptions(km->ksps[0]);
503:   KSPSetUp(km->ksps[0]);
504:   PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
505:   PCMPIKSPCountsSeq++;
506:   return 0;
507: }

509: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
510: {
511:   PC_MPI  *km = (PC_MPI *)pc->data;
512:   PetscInt its, n;
513:   Mat      A;

515:   KSPSolve(km->ksps[0], b, x);
516:   KSPGetIterationNumber(km->ksps[0], &its);
517:   PCMPISolveCountsSeq++;
518:   PCMPIIterationsSeq += its;
519:   KSPGetOperators(km->ksps[0], NULL, &A);
520:   MatGetSize(A, &n, NULL);
521:   PCMPISizesSeq += n;
522:   return 0;
523: }

525: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
526: {
527:   PC_MPI     *km = (PC_MPI *)pc->data;
528:   const char *prefix;

530:   PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n");
531:   PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank);
532:   PCGetOptionsPrefix(pc, &prefix);
533:   if (prefix) PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix);
534:   else PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n");
535:   PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n");
536:   return 0;
537: }

539: static PetscErrorCode PCDestroy_Seq(PC pc)
540: {
541:   PC_MPI *km = (PC_MPI *)pc->data;

543:   KSPDestroy(&km->ksps[0]);
544:   PetscFree(pc->data);
545:   return 0;
546: }

548: /*
549:      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
550:      right hand side to the parallel PC
551: */
552: static PetscErrorCode PCSetUp_MPI(PC pc)
553: {
554:   PC_MPI      *km = (PC_MPI *)pc->data;
555:   PetscMPIInt  rank, size;
556:   PCMPICommand request;
557:   PetscBool    newmatrix = PETSC_FALSE;

559:   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
561:   MPI_Comm_size(MPI_COMM_WORLD, &size);

563:   if (!pc->setupcalled) {
564:     if (!km->alwaysuseserver) {
565:       PetscInt n;
566:       Mat      sA;
567:       /* short circuit for small systems */
568:       PCGetOperators(pc, &sA, &sA);
569:       MatGetSize(sA, &n, NULL);
570:       if (n < 2 * km->mincntperrank - 1 || size == 1) {
571:         pc->ops->setup   = NULL;
572:         pc->ops->apply   = PCApply_Seq;
573:         pc->ops->destroy = PCDestroy_Seq;
574:         pc->ops->view    = PCView_Seq;
575:         PCSetUp_Seq(pc);
576:         return 0;
577:       }
578:     }

580:     request = PCMPI_CREATE;
581:     MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
582:     PCMPICreate(pc);
583:     newmatrix = PETSC_TRUE;
584:   }
585:   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;

587:   if (newmatrix) {
588:     PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
589:     request = PCMPI_SET_MAT;
590:     MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
591:     PCMPISetMat(pc);
592:   } else {
593:     PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
594:     request = PCMPI_UPDATE_MAT_VALUES;
595:     MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
596:     PCMPIUpdateMatValues(pc);
597:   }
598:   return 0;
599: }

601: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
602: {
603:   PCMPICommand request = PCMPI_SOLVE;

605:   MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
606:   PCMPISolve(pc, b, x);
607:   return 0;
608: }

610: PetscErrorCode PCDestroy_MPI(PC pc)
611: {
612:   PCMPICommand request = PCMPI_DESTROY;

614:   MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
615:   PCMPIDestroy(pc);
616:   PetscFree(pc->data);
617:   return 0;
618: }

620: /*
621:      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
622: */
623: PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
624: {
625:   PC_MPI     *km = (PC_MPI *)pc->data;
626:   MPI_Comm    comm;
627:   PetscMPIInt size;
628:   const char *prefix;

630:   PetscObjectGetComm((PetscObject)km->ksps[0], &comm);
631:   MPI_Comm_size(comm, &size);
632:   PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size);
633:   PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank);
634:   PCGetOptionsPrefix(pc, &prefix);
635:   if (prefix) PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix);
636:   else PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n");
637:   PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n");
638:   return 0;
639: }

641: PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
642: {
643:   PC_MPI *km = (PC_MPI *)pc->data;

645:   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
646:   PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL);
647:   PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL);
648:   PetscOptionsHeadEnd();
649:   return 0;
650: }

652: /*MC
653:      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process

655:    Level: beginner

657:    Options Database Keys:
658: +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
659: .  -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
660: .  -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
661: -  -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes

663:    Notes:
664:    The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_

666:    The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
667:    potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.

669:    It can be particularly useful for user OpenMP code or potentially user GPU code.

671:    When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
672:    and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.

674:    The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
675:    because they are not the actual solver objects.

677:    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
678:    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.

680: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
681: M*/
682: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
683: {
684:   PC_MPI *km;

686:   PetscNew(&km);
687:   pc->data = (void *)km;

689:   km->mincntperrank = 10000;

691:   pc->ops->setup          = PCSetUp_MPI;
692:   pc->ops->apply          = PCApply_MPI;
693:   pc->ops->destroy        = PCDestroy_MPI;
694:   pc->ops->view           = PCView_MPI;
695:   pc->ops->setfromoptions = PCSetFromOptions_MPI;
696:   return 0;
697: }