Actual source code: vpbjacobi.c

  1: #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>

  3: static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y)
  4: {
  5:   PC_VPBJacobi      *jac = (PC_VPBJacobi *)pc->data;
  6:   PetscInt           i, ncnt = 0;
  7:   const MatScalar   *diag = jac->diag;
  8:   PetscInt           ib, jb, bs;
  9:   const PetscScalar *xx;
 10:   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
 11:   PetscInt           nblocks;
 12:   const PetscInt    *bsizes;

 14:   MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes);
 15:   VecGetArrayRead(x, &xx);
 16:   VecGetArray(y, &yy);
 17:   for (i = 0; i < nblocks; i++) {
 18:     bs = bsizes[i];
 19:     switch (bs) {
 20:     case 1:
 21:       yy[ncnt] = *diag * xx[ncnt];
 22:       break;
 23:     case 2:
 24:       x0           = xx[ncnt];
 25:       x1           = xx[ncnt + 1];
 26:       yy[ncnt]     = diag[0] * x0 + diag[2] * x1;
 27:       yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1;
 28:       break;
 29:     case 3:
 30:       x0           = xx[ncnt];
 31:       x1           = xx[ncnt + 1];
 32:       x2           = xx[ncnt + 2];
 33:       yy[ncnt]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
 34:       yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
 35:       yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
 36:       break;
 37:     case 4:
 38:       x0           = xx[ncnt];
 39:       x1           = xx[ncnt + 1];
 40:       x2           = xx[ncnt + 2];
 41:       x3           = xx[ncnt + 3];
 42:       yy[ncnt]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
 43:       yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
 44:       yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
 45:       yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
 46:       break;
 47:     case 5:
 48:       x0           = xx[ncnt];
 49:       x1           = xx[ncnt + 1];
 50:       x2           = xx[ncnt + 2];
 51:       x3           = xx[ncnt + 3];
 52:       x4           = xx[ncnt + 4];
 53:       yy[ncnt]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
 54:       yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
 55:       yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
 56:       yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
 57:       yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
 58:       break;
 59:     case 6:
 60:       x0           = xx[ncnt];
 61:       x1           = xx[ncnt + 1];
 62:       x2           = xx[ncnt + 2];
 63:       x3           = xx[ncnt + 3];
 64:       x4           = xx[ncnt + 4];
 65:       x5           = xx[ncnt + 5];
 66:       yy[ncnt]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
 67:       yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
 68:       yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
 69:       yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
 70:       yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
 71:       yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
 72:       break;
 73:     case 7:
 74:       x0           = xx[ncnt];
 75:       x1           = xx[ncnt + 1];
 76:       x2           = xx[ncnt + 2];
 77:       x3           = xx[ncnt + 3];
 78:       x4           = xx[ncnt + 4];
 79:       x5           = xx[ncnt + 5];
 80:       x6           = xx[ncnt + 6];
 81:       yy[ncnt]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
 82:       yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
 83:       yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
 84:       yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
 85:       yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
 86:       yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
 87:       yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
 88:       break;
 89:     default:
 90:       for (ib = 0; ib < bs; ib++) {
 91:         PetscScalar rowsum = 0;
 92:         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb];
 93:         yy[ncnt + ib] = rowsum;
 94:       }
 95:     }
 96:     ncnt += bsizes[i];
 97:     diag += bsizes[i] * bsizes[i];
 98:   }
 99:   VecRestoreArrayRead(x, &xx);
100:   VecRestoreArray(y, &yy);
101:   return 0;
102: }

104: PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc)
105: {
106:   PC_VPBJacobi   *jac = (PC_VPBJacobi *)pc->data;
107:   Mat             A   = pc->pmat;
108:   MatFactorError  err;
109:   PetscInt        i, nsize = 0, nlocal;
110:   PetscInt        nblocks;
111:   const PetscInt *bsizes;

113:   MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes);
114:   MatGetLocalSize(pc->pmat, &nlocal, NULL);
116:   if (!jac->diag) {
117:     for (i = 0; i < nblocks; i++) nsize += bsizes[i] * bsizes[i];
118:     PetscMalloc1(nsize, &jac->diag);
119:   }
120:   MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag);
121:   MatFactorGetError(A, &err);
122:   if (err) pc->failedreason = (PCFailedReason)err;
123:   pc->ops->apply = PCApply_VPBJacobi;
124:   return 0;
125: }

127: static PetscErrorCode PCSetUp_VPBJacobi(PC pc)
128: {
129:   /* In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
130: #if defined(PETSC_HAVE_CUDA)
131:   PetscBool isCuda;
132:   PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");
133: #endif
134: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
135:   PetscBool isKok;
136:   PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "");
137: #endif

139: #if defined(PETSC_HAVE_CUDA)
140:   if (isCuda) PCSetUp_VPBJacobi_CUDA(pc);
141:   else
142: #endif
143: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
144:     if (isKok)
145:     PCSetUp_VPBJacobi_Kokkos(pc);
146:   else
147: #endif
148:   {
149:     PCSetUp_VPBJacobi_Host(pc);
150:   }
151:   return 0;
152: }

154: PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc)
155: {
156:   PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;

158:   /*
159:       Free the private data structure that was hanging off the PC
160:   */
161:   PetscFree(jac->diag);
162:   PetscFree(pc->data);
163:   return 0;
164: }

166: /*MC
167:      PCVPBJACOBI - Variable size point block Jacobi preconditioner

169:    Level: beginner

171:    Notes:
172:      See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks

174:      This works for `MATAIJ` matrices

176:      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
177:      is detected a PETSc error is generated.

179:      One must call `MatSetVariableBlockSizes()` to use this preconditioner

181:    Developer Notes:
182:      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
183:      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
184:      terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
185:      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.

187:      Perhaps should provide an option that allows generation of a valid preconditioner
188:      even if a block is singular as the `PCJACOBI` does.

190: .seealso: `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
191: M*/

193: PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc)
194: {
195:   PC_VPBJacobi *jac;

197:   /*
198:      Creates the private data structure for this preconditioner and
199:      attach it to the PC object.
200:   */
201:   PetscNew(&jac);
202:   pc->data = (void *)jac;

204:   /*
205:      Initialize the pointers to vectors to ZERO; these will be used to store
206:      diagonal entries of the matrix for fast preconditioner application.
207:   */
208:   jac->diag = NULL;

210:   /*
211:       Set the pointers for the functions that are provided above.
212:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
213:       are called, they will automatically call these functions.  Note we
214:       choose not to provide a couple of these functions since they are
215:       not needed.
216:   */
217:   pc->ops->apply               = PCApply_VPBJacobi;
218:   pc->ops->applytranspose      = NULL;
219:   pc->ops->setup               = PCSetUp_VPBJacobi;
220:   pc->ops->destroy             = PCDestroy_VPBJacobi;
221:   pc->ops->setfromoptions      = NULL;
222:   pc->ops->applyrichardson     = NULL;
223:   pc->ops->applysymmetricleft  = NULL;
224:   pc->ops->applysymmetricright = NULL;
225:   return 0;
226: }