Actual source code: pcmpi.c
1: /*
2: This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3: It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
5: That program may use OpenMP to compute the right hand side and matrix for the linear system
7: The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
9: The resulting KSP and PC can only be controlled via the options database, though some common commands
10: could be passed through the server.
12: */
13: #include <petsc/private/pcimpl.h>
14: #include <petsc/private/kspimpl.h>
15: #include <petsc.h>
17: #define PC_MPI_MAX_RANKS 256
18: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
20: typedef struct {
21: KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22: PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23: PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24: PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25: PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26: } PC_MPI;
28: typedef enum {
29: PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30: PCMPI_CREATE,
31: PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */
32: PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33: PCMPI_SOLVE,
34: PCMPI_VIEW,
35: PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36: } PCMPICommand;
38: static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS];
39: static PetscBool PCMPICommSet = PETSC_FALSE;
40: static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41: static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
43: static PetscErrorCode PCMPICommsCreate(void)
44: {
45: MPI_Comm comm = PC_MPI_COMM_WORLD;
46: PetscMPIInt size, rank, i;
48: MPI_Comm_size(comm, &size);
50: MPI_Comm_rank(comm, &rank);
51: /* comm for size 1 is useful only for debugging */
52: for (i = 0; i < size; i++) {
53: PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
54: MPI_Comm_split(comm, color, 0, &PCMPIComms[i]);
55: PCMPISolveCounts[i] = 0;
56: PCMPIKSPCounts[i] = 0;
57: PCMPIIterations[i] = 0;
58: PCMPISizes[i] = 0;
59: }
60: PCMPICommSet = PETSC_TRUE;
61: return 0;
62: }
64: PetscErrorCode PCMPICommsDestroy(void)
65: {
66: MPI_Comm comm = PC_MPI_COMM_WORLD;
67: PetscMPIInt size, rank, i;
69: if (!PCMPICommSet) return 0;
70: MPI_Comm_size(comm, &size);
71: MPI_Comm_rank(comm, &rank);
72: for (i = 0; i < size; i++) {
73: if (PCMPIComms[i] != MPI_COMM_NULL) MPI_Comm_free(&PCMPIComms[i]);
74: }
75: PCMPICommSet = PETSC_FALSE;
76: return 0;
77: }
79: static PetscErrorCode PCMPICreate(PC pc)
80: {
81: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
82: MPI_Comm comm = PC_MPI_COMM_WORLD;
83: KSP ksp;
84: PetscInt N[2], mincntperrank = 0;
85: PetscMPIInt size;
86: Mat sA;
87: char *prefix;
88: PetscMPIInt len = 0;
90: if (!PCMPICommSet) PCMPICommsCreate();
91: MPI_Comm_size(comm, &size);
92: if (pc) {
93: if (size == 1) PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n");
94: PCGetOperators(pc, &sA, &sA);
95: MatGetSize(sA, &N[0], &N[1]);
96: }
97: MPI_Bcast(N, 2, MPIU_INT, 0, comm);
99: /* choose a suitable sized MPI_Comm for the problem to be solved on */
100: if (km) mincntperrank = km->mincntperrank;
101: MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm);
102: comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
103: if (comm == MPI_COMM_NULL) {
104: ksp = NULL;
105: return 0;
106: }
107: PetscLogStagePush(PCMPIStage);
108: KSPCreate(comm, &ksp);
109: PetscLogStagePop();
110: MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm);
111: if (pc) {
112: size_t slen;
114: MPI_Comm_size(comm, &size);
115: PCMPIKSPCounts[size - 1]++;
116: PCGetOptionsPrefix(pc, (const char **)&prefix);
117: PetscStrlen(prefix, &slen);
118: len = (PetscMPIInt)slen;
119: }
120: MPI_Bcast(&len, 1, MPI_INT, 0, comm);
121: if (len) {
122: if (!pc) PetscMalloc1(len + 1, &prefix);
123: MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm);
124: KSPSetOptionsPrefix(ksp, prefix);
125: }
126: KSPAppendOptionsPrefix(ksp, "mpi_");
127: return 0;
128: }
130: static PetscErrorCode PCMPISetMat(PC pc)
131: {
132: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
133: Mat A;
134: PetscInt m, n, *ia, *ja, j, bs;
135: Mat sA;
136: MPI_Comm comm = PC_MPI_COMM_WORLD;
137: KSP ksp;
138: PetscLayout layout;
139: const PetscInt *IA = NULL, *JA = NULL;
140: const PetscInt *range;
141: PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
142: PetscScalar *a;
143: const PetscScalar *sa = NULL;
144: PetscInt matproperties[7] = {0, 0, 0, 0, 0, 0, 0};
146: MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
147: if (!ksp) return 0;
148: PetscObjectGetComm((PetscObject)ksp, &comm);
149: if (pc) {
150: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
152: MPI_Comm_size(comm, &size);
153: PCMPIMatCounts[size - 1]++;
154: PCGetOperators(pc, &sA, &sA);
155: MatGetSize(sA, &matproperties[0], &matproperties[1]);
156: MatGetBlockSize(sA, &bs);
157: matproperties[2] = bs;
158: MatIsSymmetricKnown(sA, &isset, &issymmetric);
159: matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
160: MatIsHermitianKnown(sA, &isset, &ishermitian);
161: matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
162: MatIsSPDKnown(sA, &isset, &isspd);
163: matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
164: MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric);
165: matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
166: }
167: MPI_Bcast(matproperties, 7, MPIU_INT, 0, comm);
169: /* determine ownership ranges of matrix columns */
170: PetscLayoutCreate(comm, &layout);
171: PetscLayoutSetBlockSize(layout, matproperties[2]);
172: PetscLayoutSetSize(layout, matproperties[1]);
173: PetscLayoutSetUp(layout);
174: PetscLayoutGetLocalSize(layout, &n);
175: PetscLayoutDestroy(&layout);
177: /* determine ownership ranges of matrix rows */
178: PetscLayoutCreate(comm, &layout);
179: PetscLayoutSetBlockSize(layout, matproperties[2]);
180: PetscLayoutSetSize(layout, matproperties[0]);
181: PetscLayoutSetUp(layout);
182: PetscLayoutGetLocalSize(layout, &m);
184: /* copy over the matrix nonzero structure and values */
185: if (pc) {
186: NZ = km->NZ;
187: NZdispl = km->NZdispl;
188: PetscLayoutGetRanges(layout, &range);
189: MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL);
190: for (i = 0; i < size; i++) {
191: sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
192: NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
193: }
194: displi[0] = 0;
195: NZdispl[0] = 0;
196: for (j = 1; j < size; j++) {
197: displi[j] = displi[j - 1] + sendcounti[j - 1] - 1;
198: NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
199: }
200: MatSeqAIJGetArrayRead(sA, &sa);
201: }
202: PetscLayoutDestroy(&layout);
203: MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm);
205: PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a);
206: MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm);
207: MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm);
208: MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm);
210: if (pc) {
211: MatSeqAIJRestoreArrayRead(sA, &sa);
212: MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL);
213: }
215: for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
216: ia[0] = 0;
217: PetscLogStagePush(PCMPIStage);
218: MatCreateMPIAIJWithArrays(comm, m, n, matproperties[0], matproperties[1], ia, ja, a, &A);
219: MatSetBlockSize(A, matproperties[2]);
220: MatSetOptionsPrefix(A, "mpi_");
221: if (matproperties[3]) MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE);
222: if (matproperties[4]) MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE);
223: if (matproperties[5]) MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE);
224: if (matproperties[6]) MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE);
226: PetscFree3(ia, ja, a);
227: KSPSetOperators(ksp, A, A);
228: if (!ksp->vec_sol) MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs);
229: PetscLogStagePop();
230: if (pc) { /* needed for scatterv/gatherv of rhs and solution */
231: const PetscInt *range;
233: VecGetOwnershipRanges(ksp->vec_sol, &range);
234: for (i = 0; i < size; i++) {
235: km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
236: km->displ[i] = (PetscMPIInt)range[i];
237: }
238: }
239: MatDestroy(&A);
240: KSPSetFromOptions(ksp);
241: return 0;
242: }
244: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
245: {
246: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
247: KSP ksp;
248: Mat sA, A;
249: MPI_Comm comm = PC_MPI_COMM_WORLD;
250: PetscScalar *a;
251: PetscCount nz;
252: const PetscScalar *sa = NULL;
253: PetscMPIInt size;
254: PetscInt matproperties[4] = {0, 0, 0, 0};
256: if (pc) {
257: PCGetOperators(pc, &sA, &sA);
258: MatSeqAIJGetArrayRead(sA, &sa);
259: }
260: MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
261: if (!ksp) return 0;
262: PetscObjectGetComm((PetscObject)ksp, &comm);
263: MPI_Comm_size(comm, &size);
264: PCMPIMatCounts[size - 1]++;
265: KSPGetOperators(ksp, NULL, &A);
266: MatMPIAIJGetNumberNonzeros(A, &nz);
267: PetscMalloc1(nz, &a);
268: MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm);
269: if (pc) {
270: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
272: MatSeqAIJRestoreArrayRead(sA, &sa);
274: MatIsSymmetricKnown(sA, &isset, &issymmetric);
275: matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
276: MatIsHermitianKnown(sA, &isset, &ishermitian);
277: matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
278: MatIsSPDKnown(sA, &isset, &isspd);
279: matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
280: MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric);
281: matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
282: }
283: MatUpdateMPIAIJWithArray(A, a);
284: PetscFree(a);
285: MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm);
286: /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
287: if (matproperties[0]) MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE);
288: if (matproperties[1]) MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE);
289: if (matproperties[2]) MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE);
290: if (matproperties[3]) MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE);
291: return 0;
292: }
294: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
295: {
296: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
297: KSP ksp;
298: MPI_Comm comm = PC_MPI_COMM_WORLD;
299: const PetscScalar *sb = NULL, *x;
300: PetscScalar *b, *sx = NULL;
301: PetscInt its, n;
302: PetscMPIInt size;
304: MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
305: if (!ksp) return 0;
306: PetscObjectGetComm((PetscObject)ksp, &comm);
308: /* TODO: optimize code to not require building counts/displ every time */
310: /* scatterv rhs */
311: MPI_Comm_size(comm, &size);
312: if (pc) {
313: PetscInt N;
315: PCMPISolveCounts[size - 1]++;
316: MatGetSize(pc->pmat, &N, NULL);
317: ;
318: PCMPISizes[size - 1] += N;
319: VecGetArrayRead(B, &sb);
320: }
321: VecGetLocalSize(ksp->vec_rhs, &n);
322: VecGetArray(ksp->vec_rhs, &b);
323: MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm);
324: VecRestoreArray(ksp->vec_rhs, &b);
325: if (pc) VecRestoreArrayRead(B, &sb);
327: PetscLogStagePush(PCMPIStage);
328: KSPSolve(ksp, NULL, NULL);
329: PetscLogStagePop();
330: KSPGetIterationNumber(ksp, &its);
331: PCMPIIterations[size - 1] += its;
333: /* gather solution */
334: VecGetArrayRead(ksp->vec_sol, &x);
335: if (pc) VecGetArray(X, &sx);
336: MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm);
337: if (pc) VecRestoreArray(X, &sx);
338: VecRestoreArrayRead(ksp->vec_sol, &x);
339: return 0;
340: }
342: static PetscErrorCode PCMPIDestroy(PC pc)
343: {
344: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
345: KSP ksp;
346: MPI_Comm comm = PC_MPI_COMM_WORLD;
348: MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
349: if (!ksp) return 0;
350: KSPDestroy(&ksp);
351: return 0;
352: }
354: /*@C
355: PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
356: parallel `KSP` solves and management of parallel `KSP` objects.
358: Logically collective on all MPI ranks except 0
360: Options Database Keys:
361: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
362: - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
364: Note:
365: This is normally started automatically in `PetscInitialize()` when the option is provided
367: Developer Notes:
368: When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
369: written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
370: (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
372: Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
374: Level: developer
376: .seealso: `PCMPIServerEnd()`, `PCMPI`
377: @*/
378: PetscErrorCode PCMPIServerBegin(void)
379: {
380: PetscMPIInt rank;
382: PetscInfo(NULL, "Starting MPI Linear Solver Server");
383: if (PetscDefined(USE_SINGLE_LIBRARY)) {
384: VecInitializePackage();
385: MatInitializePackage();
386: DMInitializePackage();
387: PCInitializePackage();
388: KSPInitializePackage();
389: SNESInitializePackage();
390: TSInitializePackage();
391: TaoInitializePackage();
392: }
394: MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank);
395: if (rank == 0) {
396: PETSC_COMM_WORLD = PETSC_COMM_SELF;
397: return 0;
398: }
400: while (PETSC_TRUE) {
401: PCMPICommand request = PCMPI_CREATE;
402: MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD);
403: switch (request) {
404: case PCMPI_CREATE:
405: PCMPICreate(NULL);
406: break;
407: case PCMPI_SET_MAT:
408: PCMPISetMat(NULL);
409: break;
410: case PCMPI_UPDATE_MAT_VALUES:
411: PCMPIUpdateMatValues(NULL);
412: break;
413: case PCMPI_VIEW:
414: // PCMPIView(NULL);
415: break;
416: case PCMPI_SOLVE:
417: PCMPISolve(NULL, NULL, NULL);
418: break;
419: case PCMPI_DESTROY:
420: PCMPIDestroy(NULL);
421: break;
422: case PCMPI_EXIT:
423: PetscFinalize();
424: exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
425: break;
426: default:
427: break;
428: }
429: }
430: return 0;
431: }
433: /*@C
434: PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
435: parallel KSP solves and management of parallel `KSP` objects.
437: Logically collective on all MPI ranks except 0
439: Note:
440: This is normally ended automatically in `PetscFinalize()` when the option is provided
442: Level: developer
444: .seealso: `PCMPIServerBegin()`, `PCMPI`
445: @*/
446: PetscErrorCode PCMPIServerEnd(void)
447: {
448: PCMPICommand request = PCMPI_EXIT;
450: if (PetscGlobalRank == 0) {
451: PetscViewer viewer = NULL;
452: PetscViewerFormat format;
454: MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD);
455: PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
456: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
457: PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL);
458: PetscOptionsEnd();
459: if (viewer) {
460: PetscBool isascii;
462: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
463: if (isascii) {
464: PetscMPIInt size;
465: PetscMPIInt i;
467: MPI_Comm_size(PETSC_COMM_WORLD, &size);
468: PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n");
469: PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n");
470: if (PCMPIKSPCountsSeq) {
471: PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq);
472: }
473: for (i = 0; i < size; i++) {
474: if (PCMPIKSPCounts[i]) {
475: PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]);
476: }
477: }
478: }
479: PetscViewerDestroy(&viewer);
480: }
481: }
482: PCMPICommsDestroy();
483: return 0;
484: }
486: /*
487: This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
488: because, for example, the problem is small. This version is more efficient because it does not require copying any data
489: */
490: static PetscErrorCode PCSetUp_Seq(PC pc)
491: {
492: PC_MPI *km = (PC_MPI *)pc->data;
493: Mat sA;
494: const char *prefix;
496: PCGetOperators(pc, NULL, &sA);
497: PCGetOptionsPrefix(pc, &prefix);
498: KSPCreate(PETSC_COMM_SELF, &km->ksps[0]);
499: KSPSetOptionsPrefix(km->ksps[0], prefix);
500: KSPAppendOptionsPrefix(km->ksps[0], "mpi_");
501: KSPSetOperators(km->ksps[0], sA, sA);
502: KSPSetFromOptions(km->ksps[0]);
503: KSPSetUp(km->ksps[0]);
504: PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
505: PCMPIKSPCountsSeq++;
506: return 0;
507: }
509: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
510: {
511: PC_MPI *km = (PC_MPI *)pc->data;
512: PetscInt its, n;
513: Mat A;
515: KSPSolve(km->ksps[0], b, x);
516: KSPGetIterationNumber(km->ksps[0], &its);
517: PCMPISolveCountsSeq++;
518: PCMPIIterationsSeq += its;
519: KSPGetOperators(km->ksps[0], NULL, &A);
520: MatGetSize(A, &n, NULL);
521: PCMPISizesSeq += n;
522: return 0;
523: }
525: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
526: {
527: PC_MPI *km = (PC_MPI *)pc->data;
528: const char *prefix;
530: PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n");
531: PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank);
532: PCGetOptionsPrefix(pc, &prefix);
533: if (prefix) PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix);
534: else PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n");
535: PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n");
536: return 0;
537: }
539: static PetscErrorCode PCDestroy_Seq(PC pc)
540: {
541: PC_MPI *km = (PC_MPI *)pc->data;
543: KSPDestroy(&km->ksps[0]);
544: PetscFree(pc->data);
545: return 0;
546: }
548: /*
549: PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
550: right hand side to the parallel PC
551: */
552: static PetscErrorCode PCSetUp_MPI(PC pc)
553: {
554: PC_MPI *km = (PC_MPI *)pc->data;
555: PetscMPIInt rank, size;
556: PCMPICommand request;
557: PetscBool newmatrix = PETSC_FALSE;
559: MPI_Comm_rank(MPI_COMM_WORLD, &rank);
561: MPI_Comm_size(MPI_COMM_WORLD, &size);
563: if (!pc->setupcalled) {
564: if (!km->alwaysuseserver) {
565: PetscInt n;
566: Mat sA;
567: /* short circuit for small systems */
568: PCGetOperators(pc, &sA, &sA);
569: MatGetSize(sA, &n, NULL);
570: if (n < 2 * km->mincntperrank - 1 || size == 1) {
571: pc->ops->setup = NULL;
572: pc->ops->apply = PCApply_Seq;
573: pc->ops->destroy = PCDestroy_Seq;
574: pc->ops->view = PCView_Seq;
575: PCSetUp_Seq(pc);
576: return 0;
577: }
578: }
580: request = PCMPI_CREATE;
581: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
582: PCMPICreate(pc);
583: newmatrix = PETSC_TRUE;
584: }
585: if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
587: if (newmatrix) {
588: PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
589: request = PCMPI_SET_MAT;
590: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
591: PCMPISetMat(pc);
592: } else {
593: PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
594: request = PCMPI_UPDATE_MAT_VALUES;
595: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
596: PCMPIUpdateMatValues(pc);
597: }
598: return 0;
599: }
601: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
602: {
603: PCMPICommand request = PCMPI_SOLVE;
605: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
606: PCMPISolve(pc, b, x);
607: return 0;
608: }
610: PetscErrorCode PCDestroy_MPI(PC pc)
611: {
612: PCMPICommand request = PCMPI_DESTROY;
614: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
615: PCMPIDestroy(pc);
616: PetscFree(pc->data);
617: return 0;
618: }
620: /*
621: PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
622: */
623: PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
624: {
625: PC_MPI *km = (PC_MPI *)pc->data;
626: MPI_Comm comm;
627: PetscMPIInt size;
628: const char *prefix;
630: PetscObjectGetComm((PetscObject)km->ksps[0], &comm);
631: MPI_Comm_size(comm, &size);
632: PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size);
633: PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank);
634: PCGetOptionsPrefix(pc, &prefix);
635: if (prefix) PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters ***\n", prefix);
636: else PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters ***\n");
637: PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n");
638: return 0;
639: }
641: PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
642: {
643: PC_MPI *km = (PC_MPI *)pc->data;
645: PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
646: PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL);
647: PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL);
648: PetscOptionsHeadEnd();
649: return 0;
650: }
652: /*MC
653: PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
655: Level: beginner
657: Options Database Keys:
658: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
659: . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
660: . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
661: - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
663: Notes:
664: The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
666: The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
667: potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
669: It can be particularly useful for user OpenMP code or potentially user GPU code.
671: When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
672: and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
674: The solver options for `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
675: because they are not the actual solver objects.
677: When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
678: stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
680: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
681: M*/
682: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
683: {
684: PC_MPI *km;
686: PetscNew(&km);
687: pc->data = (void *)km;
689: km->mincntperrank = 10000;
691: pc->ops->setup = PCSetUp_MPI;
692: pc->ops->apply = PCApply_MPI;
693: pc->ops->destroy = PCDestroy_MPI;
694: pc->ops->view = PCView_MPI;
695: pc->ops->setfromoptions = PCSetFromOptions_MPI;
696: return 0;
697: }