Actual source code: vpbjacobi.c
1: #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
3: static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y)
4: {
5: PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
6: PetscInt i, ncnt = 0;
7: const MatScalar *diag = jac->diag;
8: PetscInt ib, jb, bs;
9: const PetscScalar *xx;
10: PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6;
11: PetscInt nblocks;
12: const PetscInt *bsizes;
14: MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes);
15: VecGetArrayRead(x, &xx);
16: VecGetArray(y, &yy);
17: for (i = 0; i < nblocks; i++) {
18: bs = bsizes[i];
19: switch (bs) {
20: case 1:
21: yy[ncnt] = *diag * xx[ncnt];
22: break;
23: case 2:
24: x0 = xx[ncnt];
25: x1 = xx[ncnt + 1];
26: yy[ncnt] = diag[0] * x0 + diag[2] * x1;
27: yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1;
28: break;
29: case 3:
30: x0 = xx[ncnt];
31: x1 = xx[ncnt + 1];
32: x2 = xx[ncnt + 2];
33: yy[ncnt] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
34: yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
35: yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
36: break;
37: case 4:
38: x0 = xx[ncnt];
39: x1 = xx[ncnt + 1];
40: x2 = xx[ncnt + 2];
41: x3 = xx[ncnt + 3];
42: yy[ncnt] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
43: yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
44: yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
45: yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
46: break;
47: case 5:
48: x0 = xx[ncnt];
49: x1 = xx[ncnt + 1];
50: x2 = xx[ncnt + 2];
51: x3 = xx[ncnt + 3];
52: x4 = xx[ncnt + 4];
53: yy[ncnt] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
54: yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
55: yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
56: yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
57: yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
58: break;
59: case 6:
60: x0 = xx[ncnt];
61: x1 = xx[ncnt + 1];
62: x2 = xx[ncnt + 2];
63: x3 = xx[ncnt + 3];
64: x4 = xx[ncnt + 4];
65: x5 = xx[ncnt + 5];
66: yy[ncnt] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
67: yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
68: yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
69: yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
70: yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
71: yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
72: break;
73: case 7:
74: x0 = xx[ncnt];
75: x1 = xx[ncnt + 1];
76: x2 = xx[ncnt + 2];
77: x3 = xx[ncnt + 3];
78: x4 = xx[ncnt + 4];
79: x5 = xx[ncnt + 5];
80: x6 = xx[ncnt + 6];
81: yy[ncnt] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
82: yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
83: yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
84: yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
85: yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
86: yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
87: yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
88: break;
89: default:
90: for (ib = 0; ib < bs; ib++) {
91: PetscScalar rowsum = 0;
92: for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb];
93: yy[ncnt + ib] = rowsum;
94: }
95: }
96: ncnt += bsizes[i];
97: diag += bsizes[i] * bsizes[i];
98: }
99: VecRestoreArrayRead(x, &xx);
100: VecRestoreArray(y, &yy);
101: return 0;
102: }
104: PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc)
105: {
106: PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
107: Mat A = pc->pmat;
108: MatFactorError err;
109: PetscInt i, nsize = 0, nlocal;
110: PetscInt nblocks;
111: const PetscInt *bsizes;
113: MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes);
114: MatGetLocalSize(pc->pmat, &nlocal, NULL);
116: if (!jac->diag) {
117: for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i];
118: PetscMalloc1(nsize, &jac->diag);
119: }
120: MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag);
121: MatFactorGetError(A, &err);
122: if (err) pc->failedreason = (PCFailedReason)err;
123: pc->ops->apply = PCApply_VPBJacobi;
124: return 0;
125: }
127: static PetscErrorCode PCSetUp_VPBJacobi(PC pc)
128: {
129: /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
130: #if defined(PETSC_HAVE_CUDA)
131: PetscBool isCuda;
132: PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");
133: #endif
134: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
135: PetscBool isKok;
136: PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "");
137: #endif
139: #if defined(PETSC_HAVE_CUDA)
140: if (isCuda) PCSetUp_VPBJacobi_CUDA(pc);
141: else
142: #endif
143: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
144: if (isKok)
145: PCSetUp_VPBJacobi_Kokkos(pc);
146: else
147: #endif
148: {
149: PCSetUp_VPBJacobi_Host(pc);
150: }
151: return 0;
152: }
154: PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc)
155: {
156: PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
158: /*
159: Free the private data structure that was hanging off the PC
160: */
161: PetscFree(jac->diag);
162: PetscFree(pc->data);
163: return 0;
164: }
166: /*MC
167: PCVPBJACOBI - Variable size point block Jacobi preconditioner
169: Level: beginner
171: Notes:
172: See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks
174: This works for `MATAIJ` matrices
176: Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
177: is detected a PETSc error is generated.
179: One must call `MatSetVariableBlockSizes()` to use this preconditioner
181: Developer Notes:
182: This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
183: the factorization to continue even after a zero pivot is found resulting in a Nan and hence
184: terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
185: a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
187: Perhaps should provide an option that allows generation of a valid preconditioner
188: even if a block is singular as the `PCJACOBI` does.
190: .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
191: M*/
193: PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc)
194: {
195: PC_VPBJacobi *jac;
197: /*
198: Creates the private data structure for this preconditioner and
199: attach it to the PC object.
200: */
201: PetscNew(&jac);
202: pc->data = (void *)jac;
204: /*
205: Initialize the pointers to vectors to ZERO; these will be used to store
206: diagonal entries of the matrix for fast preconditioner application.
207: */
208: jac->diag = NULL;
210: /*
211: Set the pointers for the functions that are provided above.
212: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
213: are called, they will automatically call these functions. Note we
214: choose not to provide a couple of these functions since they are
215: not needed.
216: */
217: pc->ops->apply = PCApply_VPBJacobi;
218: pc->ops->applytranspose = NULL;
219: pc->ops->setup = PCSetUp_VPBJacobi;
220: pc->ops->destroy = PCDestroy_VPBJacobi;
221: pc->ops->setfromoptions = NULL;
222: pc->ops->applyrichardson = NULL;
223: pc->ops->applysymmetricleft = NULL;
224: pc->ops->applysymmetricright = NULL;
225: return 0;
226: }