Actual source code: pch2opus.c

  1: #include <petsc/private/pcimpl.h>
  2: #include <petsc/private/matimpl.h>
  3: #include <h2opusconf.h>

  5: /* Use GPU only if H2OPUS is configured for GPU */
  6: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
  7:   #define PETSC_H2OPUS_USE_GPU
  8: #endif

 10: typedef struct {
 11:   Mat         A;
 12:   Mat         M;
 13:   PetscScalar s0;

 15:   /* sampler for Newton-Schultz */
 16:   Mat      S;
 17:   PetscInt hyperorder;
 18:   Vec      wns[4];
 19:   Mat      wnsmat[4];

 21:   /* convergence testing */
 22:   Mat T;
 23:   Vec w;

 25:   /* Support for PCSetCoordinates */
 26:   PetscInt   sdim;
 27:   PetscInt   nlocc;
 28:   PetscReal *coords;

 30:   /* Newton-Schultz customization */
 31:   PetscInt  maxits;
 32:   PetscReal rtol, atol;
 33:   PetscBool monitor;
 34:   PetscBool useapproximatenorms;
 35:   NormType  normtype;

 37:   /* Used when pmat != MATH2OPUS */
 38:   PetscReal eta;
 39:   PetscInt  leafsize;
 40:   PetscInt  max_rank;
 41:   PetscInt  bs;
 42:   PetscReal mrtol;

 44:   /* CPU/GPU */
 45:   PetscBool forcecpu;
 46:   PetscBool boundtocpu;
 47: } PC_H2OPUS;

 49: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);

 51: static PetscErrorCode PCReset_H2OPUS(PC pc)
 52: {
 53:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

 55:   pch2opus->sdim  = 0;
 56:   pch2opus->nlocc = 0;
 57:   PetscFree(pch2opus->coords);
 58:   MatDestroy(&pch2opus->A);
 59:   MatDestroy(&pch2opus->M);
 60:   MatDestroy(&pch2opus->T);
 61:   VecDestroy(&pch2opus->w);
 62:   MatDestroy(&pch2opus->S);
 63:   VecDestroy(&pch2opus->wns[0]);
 64:   VecDestroy(&pch2opus->wns[1]);
 65:   VecDestroy(&pch2opus->wns[2]);
 66:   VecDestroy(&pch2opus->wns[3]);
 67:   MatDestroy(&pch2opus->wnsmat[0]);
 68:   MatDestroy(&pch2opus->wnsmat[1]);
 69:   MatDestroy(&pch2opus->wnsmat[2]);
 70:   MatDestroy(&pch2opus->wnsmat[3]);
 71:   return 0;
 72: }

 74: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
 75: {
 76:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
 77:   PetscBool  reset    = PETSC_TRUE;

 79:   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
 80:     PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset);
 81:     reset = (PetscBool)!reset;
 82:   }
 83:   MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
 84:   if (reset) {
 85:     PCReset_H2OPUS(pc);
 86:     PetscMalloc1(sdim * nlocc, &pch2opus->coords);
 87:     PetscArraycpy(pch2opus->coords, coords, sdim * nlocc);
 88:     pch2opus->sdim  = sdim;
 89:     pch2opus->nlocc = nlocc;
 90:   }
 91:   return 0;
 92: }

 94: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
 95: {
 96:   PCReset_H2OPUS(pc);
 97:   PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL);
 98:   PetscFree(pc->data);
 99:   return 0;
100: }

102: static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems *PetscOptionsObject)
103: {
104:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

106:   PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
107:   PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL);
108:   PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL);
109:   PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL);
110:   PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL);
111:   PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL);
112:   PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL);
113:   PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL);
114:   PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL);
115:   PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL);
116:   PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL);
117:   PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL);
118:   PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL);
119:   PetscOptionsHeadEnd();
120:   return 0;
121: }

123: typedef struct {
124:   Mat A;
125:   Mat M;
126:   Vec w;
127: } AAtCtx;

129: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
130: {
131:   AAtCtx *aat;

133:   MatShellGetContext(A, (void *)&aat);
134:   /* MatMultTranspose(aat->M,x,aat->w); */
135:   MatMultTranspose(aat->A, x, aat->w);
136:   MatMult(aat->A, aat->w, y);
137:   return 0;
138: }

140: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
141: {
142:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
143:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat, AAt;
144:   PetscInt   M, m;
145:   VecType    vtype;
146:   PetscReal  n;
147:   AAtCtx     aat;

149:   aat.A = A;
150:   aat.M = pch2opus->M; /* unused so far */
151:   MatCreateVecs(A, NULL, &aat.w);
152:   MatGetSize(A, &M, NULL);
153:   MatGetLocalSize(A, &m, NULL);
154:   MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt);
155:   MatBindToCPU(AAt, pch2opus->boundtocpu);
156:   MatShellSetOperation(AAt, MATOP_MULT, (void (*)(void))MatMult_AAt);
157:   MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMult_AAt);
158:   MatShellSetOperation(AAt, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS);
159:   MatGetVecType(A, &vtype);
160:   MatShellSetVecType(AAt, vtype);
161:   MatNorm(AAt, NORM_1, &n);
162:   pch2opus->s0 = 1. / n;
163:   VecDestroy(&aat.w);
164:   MatDestroy(&AAt);
165:   return 0;
166: }

168: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
169: {
170:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

172:   if (t) MatMultTranspose(pch2opus->M, x, y);
173:   else MatMult(pch2opus->M, x, y);
174:   return 0;
175: }

177: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
178: {
179:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;

181:   if (t) MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y);
182:   else MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y);
183:   return 0;
184: }

186: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
187: {
188:   PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE);
189:   return 0;
190: }

192: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
193: {
194:   PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE);
195:   return 0;
196: }

198: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
199: {
200:   PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE);
201:   return 0;
202: }

204: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
205: {
206:   PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE);
207:   return 0;
208: }

210: /* used to test the norm of (M^-1 A - I) */
211: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
212: {
213:   PC         pc;
214:   Mat        A;
215:   PC_H2OPUS *pch2opus;
216:   PetscBool  sideleft = PETSC_TRUE;

218:   MatShellGetContext(M, (void *)&pc);
219:   pch2opus = (PC_H2OPUS *)pc->data;
220:   if (!pch2opus->w) MatCreateVecs(pch2opus->M, &pch2opus->w, NULL);
221:   A = pch2opus->A;
222:   VecBindToCPU(pch2opus->w, pch2opus->boundtocpu);
223:   if (t) {
224:     if (sideleft) {
225:       PCApplyTranspose_H2OPUS(pc, x, pch2opus->w);
226:       MatMultTranspose(A, pch2opus->w, y);
227:     } else {
228:       MatMultTranspose(A, x, pch2opus->w);
229:       PCApplyTranspose_H2OPUS(pc, pch2opus->w, y);
230:     }
231:   } else {
232:     if (sideleft) {
233:       MatMult(A, x, pch2opus->w);
234:       PCApply_H2OPUS(pc, pch2opus->w, y);
235:     } else {
236:       PCApply_H2OPUS(pc, x, pch2opus->w);
237:       MatMult(A, pch2opus->w, y);
238:     }
239:   }
240:   VecAXPY(y, -1.0, x);
241:   return 0;
242: }

244: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
245: {
246:   MatMultKernel_MAmI(A, x, y, PETSC_FALSE);
247:   return 0;
248: }

250: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
251: {
252:   MatMultKernel_MAmI(A, x, y, PETSC_TRUE);
253:   return 0;
254: }

256: /* HyperPower kernel:
257: Y = R = x
258: for i = 1 . . . l - 1 do
259:   R = (I - A * Xk) * R
260:   Y = Y + R
261: Y = Xk * Y
262: */
263: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
264: {
265:   PC         pc;
266:   Mat        A;
267:   PC_H2OPUS *pch2opus;

269:   MatShellGetContext(M, (void *)&pc);
270:   pch2opus = (PC_H2OPUS *)pc->data;
271:   A        = pch2opus->A;
272:   MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
273:   MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);
274:   VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu);
275:   VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu);
276:   VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu);
277:   VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu);
278:   VecCopy(x, pch2opus->wns[0]);
279:   VecCopy(x, pch2opus->wns[3]);
280:   if (t) {
281:     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
282:       MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]);
283:       PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]);
284:       VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]);
285:       VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]);
286:     }
287:     PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y);
288:   } else {
289:     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
290:       PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]);
291:       MatMult(A, pch2opus->wns[1], pch2opus->wns[2]);
292:       VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]);
293:       VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]);
294:     }
295:     PCApply_H2OPUS(pc, pch2opus->wns[3], y);
296:   }
297:   return 0;
298: }

300: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
301: {
302:   MatMultKernel_Hyper(M, x, y, PETSC_FALSE);
303:   return 0;
304: }

306: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
307: {
308:   MatMultKernel_Hyper(M, x, y, PETSC_TRUE);
309:   return 0;
310: }

312: /* Hyper power kernel, MatMat version */
313: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
314: {
315:   PC         pc;
316:   Mat        A;
317:   PC_H2OPUS *pch2opus;
318:   PetscInt   i;

320:   MatShellGetContext(M, (void *)&pc);
321:   pch2opus = (PC_H2OPUS *)pc->data;
322:   A        = pch2opus->A;
323:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
324:     MatDestroy(&pch2opus->wnsmat[0]);
325:     MatDestroy(&pch2opus->wnsmat[1]);
326:   }
327:   if (!pch2opus->wnsmat[0]) {
328:     MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]);
329:     MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]);
330:   }
331:   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
332:     MatDestroy(&pch2opus->wnsmat[2]);
333:     MatDestroy(&pch2opus->wnsmat[3]);
334:   }
335:   if (!pch2opus->wnsmat[2]) {
336:     MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]);
337:     MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]);
338:   }
339:   MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN);
340:   MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN);
341:   if (t) {
342:     for (i = 0; i < pch2opus->hyperorder - 1; i++) {
343:       MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]);
344:       PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]);
345:       MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN);
346:       MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN);
347:     }
348:     PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y);
349:   } else {
350:     for (i = 0; i < pch2opus->hyperorder - 1; i++) {
351:       PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]);
352:       MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[2]);
353:       MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN);
354:       MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN);
355:     }
356:     PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y);
357:   }
358:   return 0;
359: }

361: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, void *ctx)
362: {
363:   MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE);
364:   return 0;
365: }

367: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
368: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
369: {
370:   PC         pc;
371:   Mat        A;
372:   PC_H2OPUS *pch2opus;

374:   MatShellGetContext(M, (void *)&pc);
375:   pch2opus = (PC_H2OPUS *)pc->data;
376:   A        = pch2opus->A;
377:   MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
378:   VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu);
379:   VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu);
380:   if (t) {
381:     PCApplyTranspose_H2OPUS(pc, x, y);
382:     MatMultTranspose(A, y, pch2opus->wns[1]);
383:     PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]);
384:     VecAXPBY(y, -1., 2., pch2opus->wns[0]);
385:   } else {
386:     PCApply_H2OPUS(pc, x, y);
387:     MatMult(A, y, pch2opus->wns[0]);
388:     PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]);
389:     VecAXPBY(y, -1., 2., pch2opus->wns[1]);
390:   }
391:   return 0;
392: }

394: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
395: {
396:   MatMultKernel_NS(M, x, y, PETSC_FALSE);
397:   return 0;
398: }

400: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
401: {
402:   MatMultKernel_NS(M, x, y, PETSC_TRUE);
403:   return 0;
404: }

406: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
407: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
408: {
409:   PC         pc;
410:   Mat        A;
411:   PC_H2OPUS *pch2opus;

413:   MatShellGetContext(M, (void *)&pc);
414:   pch2opus = (PC_H2OPUS *)pc->data;
415:   A        = pch2opus->A;
416:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
417:     MatDestroy(&pch2opus->wnsmat[0]);
418:     MatDestroy(&pch2opus->wnsmat[1]);
419:   }
420:   if (!pch2opus->wnsmat[0]) {
421:     MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]);
422:     MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]);
423:   }
424:   if (t) {
425:     PCApplyTransposeMat_H2OPUS(pc, X, Y);
426:     MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]);
427:     PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]);
428:     MatScale(Y, 2.);
429:     MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN);
430:   } else {
431:     PCApplyMat_H2OPUS(pc, X, Y);
432:     MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[0]);
433:     PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]);
434:     MatScale(Y, 2.);
435:     MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN);
436:   }
437:   return 0;
438: }

440: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
441: {
442:   MatMatMultKernel_NS(M, X, Y, PETSC_FALSE);
443:   return 0;
444: }

446: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
447: {
448:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
449:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;

451:   if (!pch2opus->S) {
452:     PetscInt M, N, m, n;

454:     MatGetSize(A, &M, &N);
455:     MatGetLocalSize(A, &m, &n);
456:     MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S);
457:     MatSetBlockSizesFromMats(pch2opus->S, A, A);
458: #if defined(PETSC_H2OPUS_USE_GPU)
459:     MatShellSetVecType(pch2opus->S, VECCUDA);
460: #endif
461:   }
462:   if (pch2opus->hyperorder >= 2) {
463:     MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_Hyper);
464:     MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Hyper);
465:     MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE);
466:     MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA);
467:   } else {
468:     MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_NS);
469:     MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_NS);
470:     MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE);
471:     MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA);
472:   }
473:   MatPropagateSymmetryOptions(A, pch2opus->S);
474:   MatBindToCPU(pch2opus->S, pch2opus->boundtocpu);
475:   /* XXX */
476:   MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE);
477:   return 0;
478: }

480: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
481: {
482:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
483:   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
484:   NormType   norm     = pch2opus->normtype;
485:   PetscReal  initerr  = 0.0, err;
486:   PetscBool  ish2opus;

488:   if (!pch2opus->T) {
489:     PetscInt    M, N, m, n;
490:     const char *prefix;

492:     PCGetOptionsPrefix(pc, &prefix);
493:     MatGetSize(A, &M, &N);
494:     MatGetLocalSize(A, &m, &n);
495:     MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T);
496:     MatSetBlockSizesFromMats(pch2opus->T, A, A);
497:     MatShellSetOperation(pch2opus->T, MATOP_MULT, (void (*)(void))MatMult_MAmI);
498:     MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_MAmI);
499:     MatShellSetOperation(pch2opus->T, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS);
500: #if defined(PETSC_H2OPUS_USE_GPU)
501:     MatShellSetVecType(pch2opus->T, VECCUDA);
502: #endif
503:     MatSetOptionsPrefix(pch2opus->T, prefix);
504:     MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_");
505:   }
506:   MatDestroy(&pch2opus->A);
507:   PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus);
508:   if (ish2opus) {
509:     PetscObjectReference((PetscObject)A);
510:     pch2opus->A = A;
511:   } else {
512:     const char *prefix;
513:     MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A);
514:     /* XXX */
515:     MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE);
516:     PCGetOptionsPrefix(pc, &prefix);
517:     MatSetOptionsPrefix(pch2opus->A, prefix);
518:     MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_");
519:     MatSetFromOptions(pch2opus->A);
520:     MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY);
521:     MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY);
522:     /* XXX */
523:     MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE);

525:     /* always perform construction on the GPU unless forcecpu is true */
526:     MatBindToCPU(pch2opus->A, pch2opus->forcecpu);
527:   }
528: #if defined(PETSC_H2OPUS_USE_GPU)
529:   pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
530: #endif
531:   MatBindToCPU(pch2opus->T, pch2opus->boundtocpu);
532:   if (pch2opus->M) { /* see if we can reuse M as initial guess */
533:     PetscReal reuse;

535:     MatBindToCPU(pch2opus->M, pch2opus->boundtocpu);
536:     MatNorm(pch2opus->T, norm, &reuse);
537:     if (reuse >= 1.0) MatDestroy(&pch2opus->M);
538:   }
539:   if (!pch2opus->M) {
540:     const char *prefix;
541:     MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M);
542:     PCGetOptionsPrefix(pc, &prefix);
543:     MatSetOptionsPrefix(pch2opus->M, prefix);
544:     MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_");
545:     MatSetFromOptions(pch2opus->M);
546:     PCH2OpusSetUpInit(pc);
547:     MatScale(pch2opus->M, pch2opus->s0);
548:   }
549:   /* A and M have the same h2 matrix structure, save on reordering routines */
550:   MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE);
551:   MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE);
552:   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) MatNorm(pch2opus->T, norm, &initerr);
553:   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
554:   err = initerr;
555:   if (pch2opus->monitor) PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr));
556:   if (initerr > pch2opus->atol && !pc->failedreason) {
557:     PetscInt i;

559:     PCH2OpusSetUpSampler_Private(pc);
560:     for (i = 0; i < pch2opus->maxits; i++) {
561:       Mat         M;
562:       const char *prefix;

564:       MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M);
565:       MatGetOptionsPrefix(pch2opus->M, &prefix);
566:       MatSetOptionsPrefix(M, prefix);
567:       MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE);
568:       MatSetFromOptions(M);
569:       MatH2OpusSetNativeMult(M, PETSC_TRUE);
570:       MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY);
571:       MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY);
572:       MatDestroy(&pch2opus->M);
573:       pch2opus->M = M;
574:       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) MatNorm(pch2opus->T, norm, &err);
575:       if (pch2opus->monitor) PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr));
576:       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
577:       if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
578:     }
579:   }
580:   /* cleanup setup workspace */
581:   MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE);
582:   MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE);
583:   VecDestroy(&pch2opus->wns[0]);
584:   VecDestroy(&pch2opus->wns[1]);
585:   VecDestroy(&pch2opus->wns[2]);
586:   VecDestroy(&pch2opus->wns[3]);
587:   MatDestroy(&pch2opus->wnsmat[0]);
588:   MatDestroy(&pch2opus->wnsmat[1]);
589:   MatDestroy(&pch2opus->wnsmat[2]);
590:   MatDestroy(&pch2opus->wnsmat[3]);
591:   return 0;
592: }

594: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
595: {
596:   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
597:   PetscBool  isascii;

599:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
600:   if (isascii) {
601:     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
602:       PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n");
603:       PetscViewerASCIIPushTab(viewer);
604:       PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL);
605:       MatView(pch2opus->A, viewer);
606:       PetscViewerPopFormat(viewer);
607:       PetscViewerASCIIPopTab(viewer);
608:     }
609:     if (pch2opus->M) {
610:       PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n");
611:       PetscViewerASCIIPushTab(viewer);
612:       PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL);
613:       MatView(pch2opus->M, viewer);
614:       PetscViewerPopFormat(viewer);
615:       PetscViewerASCIIPopTab(viewer);
616:     }
617:     PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0);
618:   }
619:   return 0;
620: }

622: /*MC
623:      PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.

625:    Options Database Keys:
626: +   -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
627: .   -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
628: .   -pc_h2opus_monitor - monitor Newton-Schultz convergence
629: .   -pc_h2opus_atol - absolute tolerance
630: .   -pc_h2opus_rtol - relative tolerance
631: .   -pc_h2opus_norm_type - normtype
632: .   -pc_h2opus_hyperorder - Hyper power order of sampling
633: .   -pc_h2opus_leafsize - leaf size when constructed from kernel
634: .   -pc_h2opus_eta - admissibility condition tolerance
635: .   -pc_h2opus_maxrank - maximum rank when constructed from matvecs
636: .   -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
637: .   -pc_h2opus_mrtol - relative tolerance for construction from sampling
638: -   -pc_h2opus_forcecpu - force construction of preconditioner on CPU

640:    Level: intermediate

642: .seealso: `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
643: M*/

645: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
646: {
647:   PC_H2OPUS *pch2opus;

649:   PetscNew(&pch2opus);
650:   pc->data = (void *)pch2opus;

652:   pch2opus->atol       = 1.e-2;
653:   pch2opus->rtol       = 1.e-6;
654:   pch2opus->maxits     = 50;
655:   pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
656:   pch2opus->normtype   = NORM_2;

658:   /* these are needed when we are sampling the pmat */
659:   pch2opus->eta      = PETSC_DECIDE;
660:   pch2opus->leafsize = PETSC_DECIDE;
661:   pch2opus->max_rank = PETSC_DECIDE;
662:   pch2opus->bs       = PETSC_DECIDE;
663:   pch2opus->mrtol    = PETSC_DECIDE;
664: #if defined(PETSC_H2OPUS_USE_GPU)
665:   pch2opus->boundtocpu = PETSC_FALSE;
666: #else
667:   pch2opus->boundtocpu = PETSC_TRUE;
668: #endif
669:   pc->ops->destroy        = PCDestroy_H2OPUS;
670:   pc->ops->setup          = PCSetUp_H2OPUS;
671:   pc->ops->apply          = PCApply_H2OPUS;
672:   pc->ops->matapply       = PCApplyMat_H2OPUS;
673:   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
674:   pc->ops->reset          = PCReset_H2OPUS;
675:   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
676:   pc->ops->view           = PCView_H2OPUS;

678:   PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS);
679:   return 0;
680: }