Actual source code: pbjacobi.c


  2: /*
  3:    Include files needed for the PBJacobi preconditioner:
  4:      pcimpl.h - private include file intended for use by all preconditioners
  5: */

  7: #include <petsc/private/pcimpl.h>

  9: /*
 10:    Private context (data structure) for the PBJacobi preconditioner.
 11: */
 12: typedef struct {
 13:   const MatScalar *diag;
 14:   PetscInt         bs, mbs;
 15: } PC_PBJacobi;

 17: static PetscErrorCode PCApply_PBJacobi_1(PC pc, Vec x, Vec y)
 18: {
 19:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
 20:   PetscInt           i, m = jac->mbs;
 21:   const MatScalar   *diag = jac->diag;
 22:   const PetscScalar *xx;
 23:   PetscScalar       *yy;

 25:   VecGetArrayRead(x, &xx);
 26:   VecGetArray(y, &yy);
 27:   for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
 28:   VecRestoreArrayRead(x, &xx);
 29:   VecRestoreArray(y, &yy);
 30:   PetscLogFlops(m);
 31:   return 0;
 32: }

 34: static PetscErrorCode PCApply_PBJacobi_2(PC pc, Vec x, Vec y)
 35: {
 36:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
 37:   PetscInt           i, m = jac->mbs;
 38:   const MatScalar   *diag = jac->diag;
 39:   PetscScalar        x0, x1, *yy;
 40:   const PetscScalar *xx;

 42:   VecGetArrayRead(x, &xx);
 43:   VecGetArray(y, &yy);
 44:   for (i = 0; i < m; i++) {
 45:     x0            = xx[2 * i];
 46:     x1            = xx[2 * i + 1];
 47:     yy[2 * i]     = diag[0] * x0 + diag[2] * x1;
 48:     yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1;
 49:     diag += 4;
 50:   }
 51:   VecRestoreArrayRead(x, &xx);
 52:   VecRestoreArray(y, &yy);
 53:   PetscLogFlops(6.0 * m);
 54:   return 0;
 55: }
 56: static PetscErrorCode PCApply_PBJacobi_3(PC pc, Vec x, Vec y)
 57: {
 58:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
 59:   PetscInt           i, m = jac->mbs;
 60:   const MatScalar   *diag = jac->diag;
 61:   PetscScalar        x0, x1, x2, *yy;
 62:   const PetscScalar *xx;

 64:   VecGetArrayRead(x, &xx);
 65:   VecGetArray(y, &yy);
 66:   for (i = 0; i < m; i++) {
 67:     x0 = xx[3 * i];
 68:     x1 = xx[3 * i + 1];
 69:     x2 = xx[3 * i + 2];

 71:     yy[3 * i]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
 72:     yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
 73:     yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
 74:     diag += 9;
 75:   }
 76:   VecRestoreArrayRead(x, &xx);
 77:   VecRestoreArray(y, &yy);
 78:   PetscLogFlops(15.0 * m);
 79:   return 0;
 80: }
 81: static PetscErrorCode PCApply_PBJacobi_4(PC pc, Vec x, Vec y)
 82: {
 83:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
 84:   PetscInt           i, m = jac->mbs;
 85:   const MatScalar   *diag = jac->diag;
 86:   PetscScalar        x0, x1, x2, x3, *yy;
 87:   const PetscScalar *xx;

 89:   VecGetArrayRead(x, &xx);
 90:   VecGetArray(y, &yy);
 91:   for (i = 0; i < m; i++) {
 92:     x0 = xx[4 * i];
 93:     x1 = xx[4 * i + 1];
 94:     x2 = xx[4 * i + 2];
 95:     x3 = xx[4 * i + 3];

 97:     yy[4 * i]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
 98:     yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
 99:     yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
100:     yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
101:     diag += 16;
102:   }
103:   VecRestoreArrayRead(x, &xx);
104:   VecRestoreArray(y, &yy);
105:   PetscLogFlops(28.0 * m);
106:   return 0;
107: }
108: static PetscErrorCode PCApply_PBJacobi_5(PC pc, Vec x, Vec y)
109: {
110:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
111:   PetscInt           i, m = jac->mbs;
112:   const MatScalar   *diag = jac->diag;
113:   PetscScalar        x0, x1, x2, x3, x4, *yy;
114:   const PetscScalar *xx;

116:   VecGetArrayRead(x, &xx);
117:   VecGetArray(y, &yy);
118:   for (i = 0; i < m; i++) {
119:     x0 = xx[5 * i];
120:     x1 = xx[5 * i + 1];
121:     x2 = xx[5 * i + 2];
122:     x3 = xx[5 * i + 3];
123:     x4 = xx[5 * i + 4];

125:     yy[5 * i]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
126:     yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
127:     yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
128:     yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
129:     yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
130:     diag += 25;
131:   }
132:   VecRestoreArrayRead(x, &xx);
133:   VecRestoreArray(y, &yy);
134:   PetscLogFlops(45.0 * m);
135:   return 0;
136: }
137: static PetscErrorCode PCApply_PBJacobi_6(PC pc, Vec x, Vec y)
138: {
139:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
140:   PetscInt           i, m = jac->mbs;
141:   const MatScalar   *diag = jac->diag;
142:   PetscScalar        x0, x1, x2, x3, x4, x5, *yy;
143:   const PetscScalar *xx;

145:   VecGetArrayRead(x, &xx);
146:   VecGetArray(y, &yy);
147:   for (i = 0; i < m; i++) {
148:     x0 = xx[6 * i];
149:     x1 = xx[6 * i + 1];
150:     x2 = xx[6 * i + 2];
151:     x3 = xx[6 * i + 3];
152:     x4 = xx[6 * i + 4];
153:     x5 = xx[6 * i + 5];

155:     yy[6 * i]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
156:     yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
157:     yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
158:     yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
159:     yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
160:     yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
161:     diag += 36;
162:   }
163:   VecRestoreArrayRead(x, &xx);
164:   VecRestoreArray(y, &yy);
165:   PetscLogFlops(66.0 * m);
166:   return 0;
167: }
168: static PetscErrorCode PCApply_PBJacobi_7(PC pc, Vec x, Vec y)
169: {
170:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
171:   PetscInt           i, m = jac->mbs;
172:   const MatScalar   *diag = jac->diag;
173:   PetscScalar        x0, x1, x2, x3, x4, x5, x6, *yy;
174:   const PetscScalar *xx;

176:   VecGetArrayRead(x, &xx);
177:   VecGetArray(y, &yy);
178:   for (i = 0; i < m; i++) {
179:     x0 = xx[7 * i];
180:     x1 = xx[7 * i + 1];
181:     x2 = xx[7 * i + 2];
182:     x3 = xx[7 * i + 3];
183:     x4 = xx[7 * i + 4];
184:     x5 = xx[7 * i + 5];
185:     x6 = xx[7 * i + 6];

187:     yy[7 * i]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
188:     yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
189:     yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
190:     yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
191:     yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
192:     yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
193:     yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
194:     diag += 49;
195:   }
196:   VecRestoreArrayRead(x, &xx);
197:   VecRestoreArray(y, &yy);
198:   PetscLogFlops(91.0 * m); /* 2*bs2 - bs */
199:   return 0;
200: }
201: static PetscErrorCode PCApply_PBJacobi_N(PC pc, Vec x, Vec y)
202: {
203:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
204:   PetscInt           i, ib, jb;
205:   const PetscInt     m    = jac->mbs;
206:   const PetscInt     bs   = jac->bs;
207:   const MatScalar   *diag = jac->diag;
208:   PetscScalar       *yy;
209:   const PetscScalar *xx;

211:   VecGetArrayRead(x, &xx);
212:   VecGetArray(y, &yy);
213:   for (i = 0; i < m; i++) {
214:     for (ib = 0; ib < bs; ib++) {
215:       PetscScalar rowsum = 0;
216:       for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb];
217:       yy[bs * i + ib] = rowsum;
218:     }
219:     diag += bs * bs;
220:   }
221:   VecRestoreArrayRead(x, &xx);
222:   VecRestoreArray(y, &yy);
223:   PetscLogFlops((2.0 * bs * bs - bs) * m); /* 2*bs2 - bs */
224:   return 0;
225: }

227: static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc, Vec x, Vec y)
228: {
229:   PC_PBJacobi       *jac = (PC_PBJacobi *)pc->data;
230:   PetscInt           i, j, k, m = jac->mbs, bs = jac->bs;
231:   const MatScalar   *diag = jac->diag;
232:   const PetscScalar *xx;
233:   PetscScalar       *yy;

235:   VecGetArrayRead(x, &xx);
236:   VecGetArray(y, &yy);
237:   for (i = 0; i < m; i++) {
238:     for (j = 0; j < bs; j++) yy[i * bs + j] = 0.;
239:     for (j = 0; j < bs; j++) {
240:       for (k = 0; k < bs; k++) yy[i * bs + k] += diag[k * bs + j] * xx[i * bs + j];
241:     }
242:     diag += bs * bs;
243:   }
244:   VecRestoreArrayRead(x, &xx);
245:   VecRestoreArray(y, &yy);
246:   PetscLogFlops(m * bs * (2 * bs - 1));
247:   return 0;
248: }

250: static PetscErrorCode PCSetUp_PBJacobi(PC pc)
251: {
252:   PC_PBJacobi   *jac = (PC_PBJacobi *)pc->data;
253:   Mat            A   = pc->pmat;
254:   MatFactorError err;
255:   PetscInt       nlocal;

257:   MatInvertBlockDiagonal(A, &jac->diag);
258:   MatFactorGetError(A, &err);
259:   if (err) pc->failedreason = (PCFailedReason)err;

261:   MatGetBlockSize(A, &jac->bs);
262:   MatGetLocalSize(A, &nlocal, NULL);
263:   jac->mbs = nlocal / jac->bs;
264:   switch (jac->bs) {
265:   case 1:
266:     pc->ops->apply = PCApply_PBJacobi_1;
267:     break;
268:   case 2:
269:     pc->ops->apply = PCApply_PBJacobi_2;
270:     break;
271:   case 3:
272:     pc->ops->apply = PCApply_PBJacobi_3;
273:     break;
274:   case 4:
275:     pc->ops->apply = PCApply_PBJacobi_4;
276:     break;
277:   case 5:
278:     pc->ops->apply = PCApply_PBJacobi_5;
279:     break;
280:   case 6:
281:     pc->ops->apply = PCApply_PBJacobi_6;
282:     break;
283:   case 7:
284:     pc->ops->apply = PCApply_PBJacobi_7;
285:     break;
286:   default:
287:     pc->ops->apply = PCApply_PBJacobi_N;
288:     break;
289:   }
290:   pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N;
291:   return 0;
292: }

294: static PetscErrorCode PCDestroy_PBJacobi(PC pc)
295: {
296:   /*
297:       Free the private data structure that was hanging off the PC
298:   */
299:   PetscFree(pc->data);
300:   return 0;
301: }

303: static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer)
304: {
305:   PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
306:   PetscBool    iascii;

308:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
309:   if (iascii) PetscViewerASCIIPrintf(viewer, "  point-block size %" PetscInt_FMT "\n", jac->bs);
310:   return 0;
311: }

313: /*MC
314:      PCPBJACOBI - Point block Jacobi preconditioner

316:    Notes:
317:     See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks

319:    This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix

321:    Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
322:    is detected a PETSc error is generated.

324:    Developer Notes:
325:      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
326:      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
327:      terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
328:      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.

330:      Perhaps should provide an option that allows generation of a valid preconditioner
331:      even if a block is singular as the `PCJACOBI` does.

333:    Level: beginner

335: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI`
336: M*/

338: PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
339: {
340:   PC_PBJacobi *jac;

342:   /*
343:      Creates the private data structure for this preconditioner and
344:      attach it to the PC object.
345:   */
346:   PetscNew(&jac);
347:   pc->data = (void *)jac;

349:   /*
350:      Initialize the pointers to vectors to ZERO; these will be used to store
351:      diagonal entries of the matrix for fast preconditioner application.
352:   */
353:   jac->diag = NULL;

355:   /*
356:       Set the pointers for the functions that are provided above.
357:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
358:       are called, they will automatically call these functions.  Note we
359:       choose not to provide a couple of these functions since they are
360:       not needed.
361:   */
362:   pc->ops->apply               = NULL; /*set depending on the block size */
363:   pc->ops->applytranspose      = NULL;
364:   pc->ops->setup               = PCSetUp_PBJacobi;
365:   pc->ops->destroy             = PCDestroy_PBJacobi;
366:   pc->ops->setfromoptions      = NULL;
367:   pc->ops->view                = PCView_PBJacobi;
368:   pc->ops->applyrichardson     = NULL;
369:   pc->ops->applysymmetricleft  = NULL;
370:   pc->ops->applysymmetricright = NULL;
371:   return 0;
372: }