Actual source code: redistribute.c


  2: /*
  3:   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
  4: */
  5: #include <petsc/private/pcimpl.h>
  6: #include <petscksp.h>

  8: typedef struct {
  9:   KSP          ksp;
 10:   Vec          x, b;
 11:   VecScatter   scatter;
 12:   IS           is;
 13:   PetscInt     dcnt, *drows; /* these are the local rows that have only diagonal entry */
 14:   PetscScalar *diag;
 15:   Vec          work;
 16:   PetscBool    zerodiag;
 17: } PC_Redistribute;

 19: static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer)
 20: {
 21:   PC_Redistribute *red = (PC_Redistribute *)pc->data;
 22:   PetscBool        iascii, isstring;
 23:   PetscInt         ncnt, N;

 25:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
 26:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);
 27:   if (iascii) {
 28:     MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
 29:     MatGetSize(pc->pmat, &N, NULL);
 30:     PetscViewerASCIIPrintf(viewer, "    Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N)));
 31:     PetscViewerASCIIPrintf(viewer, "  Redistribute preconditioner: \n");
 32:     KSPView(red->ksp, viewer);
 33:   } else if (isstring) {
 34:     PetscViewerStringSPrintf(viewer, " Redistribute preconditioner");
 35:     KSPView(red->ksp, viewer);
 36:   }
 37:   return 0;
 38: }

 40: static PetscErrorCode PCSetUp_Redistribute(PC pc)
 41: {
 42:   PC_Redistribute         *red = (PC_Redistribute *)pc->data;
 43:   MPI_Comm                 comm;
 44:   PetscInt                 rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows;
 45:   PetscLayout              map, nmap;
 46:   PetscMPIInt              size, tag, n;
 47:   PETSC_UNUSED PetscMPIInt imdex;
 48:   PetscInt                *source = NULL;
 49:   PetscMPIInt             *sizes  = NULL, nrecvs;
 50:   PetscInt                 j, nsends;
 51:   PetscInt                *owner = NULL, *starts = NULL, count, slen;
 52:   PetscInt                *rvalues, *svalues, recvtotal;
 53:   PetscMPIInt             *onodes1, *olengths1;
 54:   MPI_Request             *send_waits = NULL, *recv_waits = NULL;
 55:   MPI_Status               recv_status, *send_status;
 56:   Vec                      tvec, diag;
 57:   Mat                      tmat;
 58:   const PetscScalar       *d, *values;
 59:   const PetscInt          *cols;

 61:   if (pc->setupcalled) {
 62:     KSPGetOperators(red->ksp, NULL, &tmat);
 63:     MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat);
 64:     KSPSetOperators(red->ksp, tmat, tmat);
 65:   } else {
 66:     PetscInt NN;

 68:     PetscObjectGetComm((PetscObject)pc, &comm);
 69:     MPI_Comm_size(comm, &size);
 70:     PetscObjectGetNewTag((PetscObject)pc, &tag);

 72:     /* count non-diagonal rows on process */
 73:     MatGetOwnershipRange(pc->mat, &rstart, &rend);
 74:     cnt = 0;
 75:     for (i = rstart; i < rend; i++) {
 76:       MatGetRow(pc->mat, i, &nz, &cols, &values);
 77:       for (PetscInt j = 0; j < nz; j++) {
 78:         if (values[j] != 0 && cols[j] != i) {
 79:           cnt++;
 80:           break;
 81:         }
 82:       }
 83:       MatRestoreRow(pc->mat, i, &nz, &cols, &values);
 84:     }
 85:     PetscMalloc1(cnt, &rows);
 86:     PetscMalloc1(rend - rstart - cnt, &drows);

 88:     /* list non-diagonal rows on process */
 89:     cnt  = 0;
 90:     dcnt = 0;
 91:     for (i = rstart; i < rend; i++) {
 92:       PetscBool diagonly = PETSC_TRUE;
 93:       MatGetRow(pc->mat, i, &nz, &cols, &values);
 94:       for (PetscInt j = 0; j < nz; j++) {
 95:         if (values[j] != 0 && cols[j] != i) {
 96:           diagonly = PETSC_FALSE;
 97:           break;
 98:         }
 99:       }
100:       if (!diagonly) rows[cnt++] = i;
101:       else drows[dcnt++] = i - rstart;
102:       MatRestoreRow(pc->mat, i, &nz, &cols, &values);
103:     }

105:     /* create PetscLayout for non-diagonal rows on each process */
106:     PetscLayoutCreate(comm, &map);
107:     PetscLayoutSetLocalSize(map, cnt);
108:     PetscLayoutSetBlockSize(map, 1);
109:     PetscLayoutSetUp(map);
110:     rstart = map->rstart;
111:     rend   = map->rend;

113:     /* create PetscLayout for load-balanced non-diagonal rows on each process */
114:     PetscLayoutCreate(comm, &nmap);
115:     MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm);
116:     PetscLayoutSetSize(nmap, ncnt);
117:     PetscLayoutSetBlockSize(nmap, 1);
118:     PetscLayoutSetUp(nmap);

120:     MatGetSize(pc->pmat, &NN, NULL);
121:     PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN))));

123:     if (size > 1) {
124:       /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
125:       /*
126:        this code is taken from VecScatterCreate_PtoS()
127:        Determines what rows need to be moved where to
128:        load balance the non-diagonal rows
129:        */
130:       /*  count number of contributors to each processor */
131:       PetscMalloc2(size, &sizes, cnt, &owner);
132:       PetscArrayzero(sizes, size);
133:       j      = 0;
134:       nsends = 0;
135:       for (i = rstart; i < rend; i++) {
136:         if (i < nmap->range[j]) j = 0;
137:         for (; j < size; j++) {
138:           if (i < nmap->range[j + 1]) {
139:             if (!sizes[j]++) nsends++;
140:             owner[i - rstart] = j;
141:             break;
142:           }
143:         }
144:       }
145:       /* inform other processors of number of messages and max length*/
146:       PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs);
147:       PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1);
148:       PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1);
149:       recvtotal = 0;
150:       for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i];

152:       /* post receives:  rvalues - rows I will own; count - nu */
153:       PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits);
154:       count = 0;
155:       for (i = 0; i < nrecvs; i++) {
156:         MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i);
157:         count += olengths1[i];
158:       }

160:       /* do sends:
161:        1) starts[i] gives the starting index in svalues for stuff going to
162:        the ith processor
163:        */
164:       PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts);
165:       starts[0] = 0;
166:       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
167:       for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i];
168:       for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart;
169:       red->drows = drows;
170:       red->dcnt  = dcnt;
171:       PetscFree(rows);

173:       starts[0] = 0;
174:       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
175:       count = 0;
176:       for (i = 0; i < size; i++) {
177:         if (sizes[i]) MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++);
178:       }

180:       /*  wait on receives */
181:       count = nrecvs;
182:       slen  = 0;
183:       while (count) {
184:         MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status);
185:         /* unpack receives into our local space */
186:         MPI_Get_count(&recv_status, MPIU_INT, &n);
187:         slen += n;
188:         count--;
189:       }
191:       ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is);

193:       /* free all work space */
194:       PetscFree(olengths1);
195:       PetscFree(onodes1);
196:       PetscFree3(rvalues, source, recv_waits);
197:       PetscFree2(sizes, owner);
198:       if (nsends) { /* wait on sends */
199:         PetscMalloc1(nsends, &send_status);
200:         MPI_Waitall(nsends, send_waits, send_status);
201:         PetscFree(send_status);
202:       }
203:       PetscFree3(svalues, send_waits, starts);
204:     } else {
205:       ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is);
206:       red->drows = drows;
207:       red->dcnt  = dcnt;
208:       slen       = cnt;
209:     }
210:     PetscLayoutDestroy(&map);
211:     PetscLayoutDestroy(&nmap);

213:     VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b);
214:     VecDuplicate(red->b, &red->x);
215:     MatCreateVecs(pc->pmat, &tvec, NULL);
216:     VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter);
217:     VecDestroy(&tvec);
218:     MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat);
219:     KSPSetOperators(red->ksp, tmat, tmat);
220:     MatDestroy(&tmat);
221:   }

223:   /* get diagonal portion of matrix */
224:   PetscFree(red->diag);
225:   PetscMalloc1(red->dcnt, &red->diag);
226:   MatCreateVecs(pc->pmat, &diag, NULL);
227:   MatGetDiagonal(pc->pmat, diag);
228:   VecGetArrayRead(diag, &d);
229:   for (i = 0; i < red->dcnt; i++) {
230:     if (d[red->drows[i]] != 0) red->diag[i] = 1.0 / d[red->drows[i]];
231:     else {
232:       red->zerodiag = PETSC_TRUE;
233:       red->diag[i]  = 0.0;
234:     }
235:   }
236:   VecRestoreArrayRead(diag, &d);
237:   VecDestroy(&diag);
238:   KSPSetUp(red->ksp);
239:   return 0;
240: }

242: static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x)
243: {
244:   PC_Redistribute   *red   = (PC_Redistribute *)pc->data;
245:   PetscInt           dcnt  = red->dcnt, i;
246:   const PetscInt    *drows = red->drows;
247:   PetscScalar       *xwork;
248:   const PetscScalar *bwork, *diag = red->diag;

250:   if (!red->work) VecDuplicate(b, &red->work);
251:   /* compute the rows of solution that have diagonal entries only */
252:   VecSet(x, 0.0); /* x = diag(A)^{-1} b */
253:   VecGetArray(x, &xwork);
254:   VecGetArrayRead(b, &bwork);
255:   if (red->zerodiag) {
256:     for (i = 0; i < dcnt; i++) {
257:       if (diag[i] == 0.0 && bwork[drows[i]] != 0.0) {
259:         PetscInfo(pc, "Linear system is inconsistent, zero matrix row but nonzero right hand side");
260:         VecSetInf(x);
261:         pc->failedreasonrank = PC_INCONSISTENT_RHS;
262:       }
263:     }
264:   }
265:   for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]];
266:   PetscLogFlops(dcnt);
267:   VecRestoreArray(red->work, &xwork);
268:   VecRestoreArrayRead(b, &bwork);
269:   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
270:   MatMult(pc->pmat, x, red->work);
271:   VecAYPX(red->work, -1.0, b); /* red->work = b - A x */

273:   VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD);
274:   VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD);
275:   KSPSolve(red->ksp, red->b, red->x);
276:   KSPCheckSolve(red->ksp, pc, red->x);
277:   VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE);
278:   VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE);
279:   return 0;
280: }

282: static PetscErrorCode PCDestroy_Redistribute(PC pc)
283: {
284:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

286:   VecScatterDestroy(&red->scatter);
287:   ISDestroy(&red->is);
288:   VecDestroy(&red->b);
289:   VecDestroy(&red->x);
290:   KSPDestroy(&red->ksp);
291:   VecDestroy(&red->work);
292:   PetscFree(red->drows);
293:   PetscFree(red->diag);
294:   PetscFree(pc->data);
295:   return 0;
296: }

298: static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject)
299: {
300:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

302:   KSPSetFromOptions(red->ksp);
303:   return 0;
304: }

306: /*@
307:    PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE`

309:    Not Collective

311:    Input Parameter:
312: .  pc - the preconditioner context

314:    Output Parameter:
315: .  innerksp - the inner `KSP`

317:    Level: advanced

319: .seealso: `KSP`, `PCREDISTRIBUTE`
320: @*/
321: PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp)
322: {
323:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

327:   *innerksp = red->ksp;
328:   return 0;
329: }

331: /*MC
332:      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then
333:      applies a `KSP` to that new smaller matrix

335:      Level: intermediate

337:      Notes:
338:      Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_

340:      Usually run this with `-ksp_type preonly`

342:      If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, `-ksp_type preonly
343:      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc` to take advantage of the symmetry.

345:      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same. Rows are moved
346:      between MPI processes inside the preconditioner to balance the number of rows on each process.

348:      Developer Note:
349:      Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.

351: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()`
352: M*/

354: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
355: {
356:   PC_Redistribute *red;
357:   const char      *prefix;

359:   PetscNew(&red);
360:   pc->data = (void *)red;

362:   pc->ops->apply          = PCApply_Redistribute;
363:   pc->ops->applytranspose = NULL;
364:   pc->ops->setup          = PCSetUp_Redistribute;
365:   pc->ops->destroy        = PCDestroy_Redistribute;
366:   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
367:   pc->ops->view           = PCView_Redistribute;

369:   KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp);
370:   KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure);
371:   PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1);
372:   PCGetOptionsPrefix(pc, &prefix);
373:   KSPSetOptionsPrefix(red->ksp, prefix);
374:   KSPAppendOptionsPrefix(red->ksp, "redistribute_");
375:   return 0;
376: }