Actual source code: pbjacobi.c
2: /*
3: Include files needed for the PBJacobi preconditioner:
4: pcimpl.h - private include file intended for use by all preconditioners
5: */
7: #include <petsc/private/pcimpl.h>
9: /*
10: Private context (data structure) for the PBJacobi preconditioner.
11: */
12: typedef struct {
13: const MatScalar *diag;
14: PetscInt bs, mbs;
15: } PC_PBJacobi;
17: static PetscErrorCode PCApply_PBJacobi_1(PC pc, Vec x, Vec y)
18: {
19: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
20: PetscInt i, m = jac->mbs;
21: const MatScalar *diag = jac->diag;
22: const PetscScalar *xx;
23: PetscScalar *yy;
25: VecGetArrayRead(x, &xx);
26: VecGetArray(y, &yy);
27: for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
28: VecRestoreArrayRead(x, &xx);
29: VecRestoreArray(y, &yy);
30: PetscLogFlops(m);
31: return 0;
32: }
34: static PetscErrorCode PCApply_PBJacobi_2(PC pc, Vec x, Vec y)
35: {
36: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
37: PetscInt i, m = jac->mbs;
38: const MatScalar *diag = jac->diag;
39: PetscScalar x0, x1, *yy;
40: const PetscScalar *xx;
42: VecGetArrayRead(x, &xx);
43: VecGetArray(y, &yy);
44: for (i = 0; i < m; i++) {
45: x0 = xx[2 * i];
46: x1 = xx[2 * i + 1];
47: yy[2 * i] = diag[0] * x0 + diag[2] * x1;
48: yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1;
49: diag += 4;
50: }
51: VecRestoreArrayRead(x, &xx);
52: VecRestoreArray(y, &yy);
53: PetscLogFlops(6.0 * m);
54: return 0;
55: }
56: static PetscErrorCode PCApply_PBJacobi_3(PC pc, Vec x, Vec y)
57: {
58: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
59: PetscInt i, m = jac->mbs;
60: const MatScalar *diag = jac->diag;
61: PetscScalar x0, x1, x2, *yy;
62: const PetscScalar *xx;
64: VecGetArrayRead(x, &xx);
65: VecGetArray(y, &yy);
66: for (i = 0; i < m; i++) {
67: x0 = xx[3 * i];
68: x1 = xx[3 * i + 1];
69: x2 = xx[3 * i + 2];
71: yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
72: yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
73: yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
74: diag += 9;
75: }
76: VecRestoreArrayRead(x, &xx);
77: VecRestoreArray(y, &yy);
78: PetscLogFlops(15.0 * m);
79: return 0;
80: }
81: static PetscErrorCode PCApply_PBJacobi_4(PC pc, Vec x, Vec y)
82: {
83: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
84: PetscInt i, m = jac->mbs;
85: const MatScalar *diag = jac->diag;
86: PetscScalar x0, x1, x2, x3, *yy;
87: const PetscScalar *xx;
89: VecGetArrayRead(x, &xx);
90: VecGetArray(y, &yy);
91: for (i = 0; i < m; i++) {
92: x0 = xx[4 * i];
93: x1 = xx[4 * i + 1];
94: x2 = xx[4 * i + 2];
95: x3 = xx[4 * i + 3];
97: yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
98: yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
99: yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
100: yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
101: diag += 16;
102: }
103: VecRestoreArrayRead(x, &xx);
104: VecRestoreArray(y, &yy);
105: PetscLogFlops(28.0 * m);
106: return 0;
107: }
108: static PetscErrorCode PCApply_PBJacobi_5(PC pc, Vec x, Vec y)
109: {
110: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
111: PetscInt i, m = jac->mbs;
112: const MatScalar *diag = jac->diag;
113: PetscScalar x0, x1, x2, x3, x4, *yy;
114: const PetscScalar *xx;
116: VecGetArrayRead(x, &xx);
117: VecGetArray(y, &yy);
118: for (i = 0; i < m; i++) {
119: x0 = xx[5 * i];
120: x1 = xx[5 * i + 1];
121: x2 = xx[5 * i + 2];
122: x3 = xx[5 * i + 3];
123: x4 = xx[5 * i + 4];
125: yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
126: yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
127: yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
128: yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
129: yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
130: diag += 25;
131: }
132: VecRestoreArrayRead(x, &xx);
133: VecRestoreArray(y, &yy);
134: PetscLogFlops(45.0 * m);
135: return 0;
136: }
137: static PetscErrorCode PCApply_PBJacobi_6(PC pc, Vec x, Vec y)
138: {
139: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
140: PetscInt i, m = jac->mbs;
141: const MatScalar *diag = jac->diag;
142: PetscScalar x0, x1, x2, x3, x4, x5, *yy;
143: const PetscScalar *xx;
145: VecGetArrayRead(x, &xx);
146: VecGetArray(y, &yy);
147: for (i = 0; i < m; i++) {
148: x0 = xx[6 * i];
149: x1 = xx[6 * i + 1];
150: x2 = xx[6 * i + 2];
151: x3 = xx[6 * i + 3];
152: x4 = xx[6 * i + 4];
153: x5 = xx[6 * i + 5];
155: yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
156: yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
157: yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
158: yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
159: yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
160: yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
161: diag += 36;
162: }
163: VecRestoreArrayRead(x, &xx);
164: VecRestoreArray(y, &yy);
165: PetscLogFlops(66.0 * m);
166: return 0;
167: }
168: static PetscErrorCode PCApply_PBJacobi_7(PC pc, Vec x, Vec y)
169: {
170: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
171: PetscInt i, m = jac->mbs;
172: const MatScalar *diag = jac->diag;
173: PetscScalar x0, x1, x2, x3, x4, x5, x6, *yy;
174: const PetscScalar *xx;
176: VecGetArrayRead(x, &xx);
177: VecGetArray(y, &yy);
178: for (i = 0; i < m; i++) {
179: x0 = xx[7 * i];
180: x1 = xx[7 * i + 1];
181: x2 = xx[7 * i + 2];
182: x3 = xx[7 * i + 3];
183: x4 = xx[7 * i + 4];
184: x5 = xx[7 * i + 5];
185: x6 = xx[7 * i + 6];
187: yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
188: yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
189: yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
190: yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
191: yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
192: yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
193: yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
194: diag += 49;
195: }
196: VecRestoreArrayRead(x, &xx);
197: VecRestoreArray(y, &yy);
198: PetscLogFlops(91.0 * m); /* 2*bs2 - bs */
199: return 0;
200: }
201: static PetscErrorCode PCApply_PBJacobi_N(PC pc, Vec x, Vec y)
202: {
203: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
204: PetscInt i, ib, jb;
205: const PetscInt m = jac->mbs;
206: const PetscInt bs = jac->bs;
207: const MatScalar *diag = jac->diag;
208: PetscScalar *yy;
209: const PetscScalar *xx;
211: VecGetArrayRead(x, &xx);
212: VecGetArray(y, &yy);
213: for (i = 0; i < m; i++) {
214: for (ib = 0; ib < bs; ib++) {
215: PetscScalar rowsum = 0;
216: for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb];
217: yy[bs * i + ib] = rowsum;
218: }
219: diag += bs * bs;
220: }
221: VecRestoreArrayRead(x, &xx);
222: VecRestoreArray(y, &yy);
223: PetscLogFlops((2.0 * bs * bs - bs) * m); /* 2*bs2 - bs */
224: return 0;
225: }
227: static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc, Vec x, Vec y)
228: {
229: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
230: PetscInt i, j, k, m = jac->mbs, bs = jac->bs;
231: const MatScalar *diag = jac->diag;
232: const PetscScalar *xx;
233: PetscScalar *yy;
235: VecGetArrayRead(x, &xx);
236: VecGetArray(y, &yy);
237: for (i = 0; i < m; i++) {
238: for (j = 0; j < bs; j++) yy[i * bs + j] = 0.;
239: for (j = 0; j < bs; j++) {
240: for (k = 0; k < bs; k++) yy[i * bs + k] += diag[k * bs + j] * xx[i * bs + j];
241: }
242: diag += bs * bs;
243: }
244: VecRestoreArrayRead(x, &xx);
245: VecRestoreArray(y, &yy);
246: PetscLogFlops(m * bs * (2 * bs - 1));
247: return 0;
248: }
250: static PetscErrorCode PCSetUp_PBJacobi(PC pc)
251: {
252: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
253: Mat A = pc->pmat;
254: MatFactorError err;
255: PetscInt nlocal;
257: MatInvertBlockDiagonal(A, &jac->diag);
258: MatFactorGetError(A, &err);
259: if (err) pc->failedreason = (PCFailedReason)err;
261: MatGetBlockSize(A, &jac->bs);
262: MatGetLocalSize(A, &nlocal, NULL);
263: jac->mbs = nlocal / jac->bs;
264: switch (jac->bs) {
265: case 1:
266: pc->ops->apply = PCApply_PBJacobi_1;
267: break;
268: case 2:
269: pc->ops->apply = PCApply_PBJacobi_2;
270: break;
271: case 3:
272: pc->ops->apply = PCApply_PBJacobi_3;
273: break;
274: case 4:
275: pc->ops->apply = PCApply_PBJacobi_4;
276: break;
277: case 5:
278: pc->ops->apply = PCApply_PBJacobi_5;
279: break;
280: case 6:
281: pc->ops->apply = PCApply_PBJacobi_6;
282: break;
283: case 7:
284: pc->ops->apply = PCApply_PBJacobi_7;
285: break;
286: default:
287: pc->ops->apply = PCApply_PBJacobi_N;
288: break;
289: }
290: pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N;
291: return 0;
292: }
294: static PetscErrorCode PCDestroy_PBJacobi(PC pc)
295: {
296: /*
297: Free the private data structure that was hanging off the PC
298: */
299: PetscFree(pc->data);
300: return 0;
301: }
303: static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer)
304: {
305: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
306: PetscBool iascii;
308: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
309: if (iascii) PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs);
310: return 0;
311: }
313: /*MC
314: PCPBJACOBI - Point block Jacobi preconditioner
316: Notes:
317: See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks
319: This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix
321: Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
322: is detected a PETSc error is generated.
324: Developer Notes:
325: This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
326: the factorization to continue even after a zero pivot is found resulting in a Nan and hence
327: terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
328: a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
330: Perhaps should provide an option that allows generation of a valid preconditioner
331: even if a block is singular as the `PCJACOBI` does.
333: Level: beginner
335: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI`
336: M*/
338: PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
339: {
340: PC_PBJacobi *jac;
342: /*
343: Creates the private data structure for this preconditioner and
344: attach it to the PC object.
345: */
346: PetscNew(&jac);
347: pc->data = (void *)jac;
349: /*
350: Initialize the pointers to vectors to ZERO; these will be used to store
351: diagonal entries of the matrix for fast preconditioner application.
352: */
353: jac->diag = NULL;
355: /*
356: Set the pointers for the functions that are provided above.
357: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
358: are called, they will automatically call these functions. Note we
359: choose not to provide a couple of these functions since they are
360: not needed.
361: */
362: pc->ops->apply = NULL; /*set depending on the block size */
363: pc->ops->applytranspose = NULL;
364: pc->ops->setup = PCSetUp_PBJacobi;
365: pc->ops->destroy = PCDestroy_PBJacobi;
366: pc->ops->setfromoptions = NULL;
367: pc->ops->view = PCView_PBJacobi;
368: pc->ops->applyrichardson = NULL;
369: pc->ops->applysymmetricleft = NULL;
370: pc->ops->applysymmetricright = NULL;
371: return 0;
372: }