Actual source code: redistribute.c
2: /*
3: This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4: */
5: #include <petsc/private/pcimpl.h>
6: #include <petscksp.h>
8: typedef struct {
9: KSP ksp;
10: Vec x, b;
11: VecScatter scatter;
12: IS is;
13: PetscInt dcnt, *drows; /* these are the local rows that have only diagonal entry */
14: PetscScalar *diag;
15: Vec work;
16: PetscBool zerodiag;
17: } PC_Redistribute;
19: static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer)
20: {
21: PC_Redistribute *red = (PC_Redistribute *)pc->data;
22: PetscBool iascii, isstring;
23: PetscInt ncnt, N;
25: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
26: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);
27: if (iascii) {
28: MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
29: MatGetSize(pc->pmat, &N, NULL);
30: PetscViewerASCIIPrintf(viewer, " Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N)));
31: PetscViewerASCIIPrintf(viewer, " Redistribute preconditioner: \n");
32: KSPView(red->ksp, viewer);
33: } else if (isstring) {
34: PetscViewerStringSPrintf(viewer, " Redistribute preconditioner");
35: KSPView(red->ksp, viewer);
36: }
37: return 0;
38: }
40: static PetscErrorCode PCSetUp_Redistribute(PC pc)
41: {
42: PC_Redistribute *red = (PC_Redistribute *)pc->data;
43: MPI_Comm comm;
44: PetscInt rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows;
45: PetscLayout map, nmap;
46: PetscMPIInt size, tag, n;
47: PETSC_UNUSED PetscMPIInt imdex;
48: PetscInt *source = NULL;
49: PetscMPIInt *sizes = NULL, nrecvs;
50: PetscInt j, nsends;
51: PetscInt *owner = NULL, *starts = NULL, count, slen;
52: PetscInt *rvalues, *svalues, recvtotal;
53: PetscMPIInt *onodes1, *olengths1;
54: MPI_Request *send_waits = NULL, *recv_waits = NULL;
55: MPI_Status recv_status, *send_status;
56: Vec tvec, diag;
57: Mat tmat;
58: const PetscScalar *d, *values;
59: const PetscInt *cols;
61: if (pc->setupcalled) {
62: KSPGetOperators(red->ksp, NULL, &tmat);
63: MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat);
64: KSPSetOperators(red->ksp, tmat, tmat);
65: } else {
66: PetscInt NN;
68: PetscObjectGetComm((PetscObject)pc, &comm);
69: MPI_Comm_size(comm, &size);
70: PetscObjectGetNewTag((PetscObject)pc, &tag);
72: /* count non-diagonal rows on process */
73: MatGetOwnershipRange(pc->mat, &rstart, &rend);
74: cnt = 0;
75: for (i = rstart; i < rend; i++) {
76: MatGetRow(pc->mat, i, &nz, &cols, &values);
77: for (PetscInt j = 0; j < nz; j++) {
78: if (values[j] != 0 && cols[j] != i) {
79: cnt++;
80: break;
81: }
82: }
83: MatRestoreRow(pc->mat, i, &nz, &cols, &values);
84: }
85: PetscMalloc1(cnt, &rows);
86: PetscMalloc1(rend - rstart - cnt, &drows);
88: /* list non-diagonal rows on process */
89: cnt = 0;
90: dcnt = 0;
91: for (i = rstart; i < rend; i++) {
92: PetscBool diagonly = PETSC_TRUE;
93: MatGetRow(pc->mat, i, &nz, &cols, &values);
94: for (PetscInt j = 0; j < nz; j++) {
95: if (values[j] != 0 && cols[j] != i) {
96: diagonly = PETSC_FALSE;
97: break;
98: }
99: }
100: if (!diagonly) rows[cnt++] = i;
101: else drows[dcnt++] = i - rstart;
102: MatRestoreRow(pc->mat, i, &nz, &cols, &values);
103: }
105: /* create PetscLayout for non-diagonal rows on each process */
106: PetscLayoutCreate(comm, &map);
107: PetscLayoutSetLocalSize(map, cnt);
108: PetscLayoutSetBlockSize(map, 1);
109: PetscLayoutSetUp(map);
110: rstart = map->rstart;
111: rend = map->rend;
113: /* create PetscLayout for load-balanced non-diagonal rows on each process */
114: PetscLayoutCreate(comm, &nmap);
115: MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm);
116: PetscLayoutSetSize(nmap, ncnt);
117: PetscLayoutSetBlockSize(nmap, 1);
118: PetscLayoutSetUp(nmap);
120: MatGetSize(pc->pmat, &NN, NULL);
121: PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN))));
123: if (size > 1) {
124: /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
125: /*
126: this code is taken from VecScatterCreate_PtoS()
127: Determines what rows need to be moved where to
128: load balance the non-diagonal rows
129: */
130: /* count number of contributors to each processor */
131: PetscMalloc2(size, &sizes, cnt, &owner);
132: PetscArrayzero(sizes, size);
133: j = 0;
134: nsends = 0;
135: for (i = rstart; i < rend; i++) {
136: if (i < nmap->range[j]) j = 0;
137: for (; j < size; j++) {
138: if (i < nmap->range[j + 1]) {
139: if (!sizes[j]++) nsends++;
140: owner[i - rstart] = j;
141: break;
142: }
143: }
144: }
145: /* inform other processors of number of messages and max length*/
146: PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs);
147: PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1);
148: PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1);
149: recvtotal = 0;
150: for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i];
152: /* post receives: rvalues - rows I will own; count - nu */
153: PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits);
154: count = 0;
155: for (i = 0; i < nrecvs; i++) {
156: MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i);
157: count += olengths1[i];
158: }
160: /* do sends:
161: 1) starts[i] gives the starting index in svalues for stuff going to
162: the ith processor
163: */
164: PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts);
165: starts[0] = 0;
166: for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
167: for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i];
168: for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart;
169: red->drows = drows;
170: red->dcnt = dcnt;
171: PetscFree(rows);
173: starts[0] = 0;
174: for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
175: count = 0;
176: for (i = 0; i < size; i++) {
177: if (sizes[i]) MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++);
178: }
180: /* wait on receives */
181: count = nrecvs;
182: slen = 0;
183: while (count) {
184: MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status);
185: /* unpack receives into our local space */
186: MPI_Get_count(&recv_status, MPIU_INT, &n);
187: slen += n;
188: count--;
189: }
191: ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is);
193: /* free all work space */
194: PetscFree(olengths1);
195: PetscFree(onodes1);
196: PetscFree3(rvalues, source, recv_waits);
197: PetscFree2(sizes, owner);
198: if (nsends) { /* wait on sends */
199: PetscMalloc1(nsends, &send_status);
200: MPI_Waitall(nsends, send_waits, send_status);
201: PetscFree(send_status);
202: }
203: PetscFree3(svalues, send_waits, starts);
204: } else {
205: ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is);
206: red->drows = drows;
207: red->dcnt = dcnt;
208: slen = cnt;
209: }
210: PetscLayoutDestroy(&map);
211: PetscLayoutDestroy(&nmap);
213: VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b);
214: VecDuplicate(red->b, &red->x);
215: MatCreateVecs(pc->pmat, &tvec, NULL);
216: VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter);
217: VecDestroy(&tvec);
218: MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat);
219: KSPSetOperators(red->ksp, tmat, tmat);
220: MatDestroy(&tmat);
221: }
223: /* get diagonal portion of matrix */
224: PetscFree(red->diag);
225: PetscMalloc1(red->dcnt, &red->diag);
226: MatCreateVecs(pc->pmat, &diag, NULL);
227: MatGetDiagonal(pc->pmat, diag);
228: VecGetArrayRead(diag, &d);
229: for (i = 0; i < red->dcnt; i++) {
230: if (d[red->drows[i]] != 0) red->diag[i] = 1.0 / d[red->drows[i]];
231: else {
232: red->zerodiag = PETSC_TRUE;
233: red->diag[i] = 0.0;
234: }
235: }
236: VecRestoreArrayRead(diag, &d);
237: VecDestroy(&diag);
238: KSPSetUp(red->ksp);
239: return 0;
240: }
242: static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x)
243: {
244: PC_Redistribute *red = (PC_Redistribute *)pc->data;
245: PetscInt dcnt = red->dcnt, i;
246: const PetscInt *drows = red->drows;
247: PetscScalar *xwork;
248: const PetscScalar *bwork, *diag = red->diag;
250: if (!red->work) VecDuplicate(b, &red->work);
251: /* compute the rows of solution that have diagonal entries only */
252: VecSet(x, 0.0); /* x = diag(A)^{-1} b */
253: VecGetArray(x, &xwork);
254: VecGetArrayRead(b, &bwork);
255: if (red->zerodiag) {
256: for (i = 0; i < dcnt; i++) {
257: if (diag[i] == 0.0 && bwork[drows[i]] != 0.0) {
259: PetscInfo(pc, "Linear system is inconsistent, zero matrix row but nonzero right hand side");
260: VecSetInf(x);
261: pc->failedreasonrank = PC_INCONSISTENT_RHS;
262: }
263: }
264: }
265: for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]];
266: PetscLogFlops(dcnt);
267: VecRestoreArray(red->work, &xwork);
268: VecRestoreArrayRead(b, &bwork);
269: /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
270: MatMult(pc->pmat, x, red->work);
271: VecAYPX(red->work, -1.0, b); /* red->work = b - A x */
273: VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD);
274: VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD);
275: KSPSolve(red->ksp, red->b, red->x);
276: KSPCheckSolve(red->ksp, pc, red->x);
277: VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE);
278: VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE);
279: return 0;
280: }
282: static PetscErrorCode PCDestroy_Redistribute(PC pc)
283: {
284: PC_Redistribute *red = (PC_Redistribute *)pc->data;
286: VecScatterDestroy(&red->scatter);
287: ISDestroy(&red->is);
288: VecDestroy(&red->b);
289: VecDestroy(&red->x);
290: KSPDestroy(&red->ksp);
291: VecDestroy(&red->work);
292: PetscFree(red->drows);
293: PetscFree(red->diag);
294: PetscFree(pc->data);
295: return 0;
296: }
298: static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject)
299: {
300: PC_Redistribute *red = (PC_Redistribute *)pc->data;
302: KSPSetFromOptions(red->ksp);
303: return 0;
304: }
306: /*@
307: PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE`
309: Not Collective
311: Input Parameter:
312: . pc - the preconditioner context
314: Output Parameter:
315: . innerksp - the inner `KSP`
317: Level: advanced
319: .seealso: `KSP`, `PCREDISTRIBUTE`
320: @*/
321: PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp)
322: {
323: PC_Redistribute *red = (PC_Redistribute *)pc->data;
327: *innerksp = red->ksp;
328: return 0;
329: }
331: /*MC
332: PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then
333: applies a `KSP` to that new smaller matrix
335: Level: intermediate
337: Notes:
338: Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_
340: Usually run this with `-ksp_type preonly`
342: If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, `-ksp_type preonly
343: -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc` to take advantage of the symmetry.
345: This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. Rows are moved
346: between MPI processes inside the preconditioner to balance the number of rows on each process.
348: Developer Note:
349: Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.
351: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()`
352: M*/
354: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
355: {
356: PC_Redistribute *red;
357: const char *prefix;
359: PetscNew(&red);
360: pc->data = (void *)red;
362: pc->ops->apply = PCApply_Redistribute;
363: pc->ops->applytranspose = NULL;
364: pc->ops->setup = PCSetUp_Redistribute;
365: pc->ops->destroy = PCDestroy_Redistribute;
366: pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
367: pc->ops->view = PCView_Redistribute;
369: KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp);
370: KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure);
371: PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1);
372: PCGetOptionsPrefix(pc, &prefix);
373: KSPSetOptionsPrefix(red->ksp, prefix);
374: KSPAppendOptionsPrefix(red->ksp, "redistribute_");
375: return 0;
376: }