Actual source code: pch2opus.c
1: #include <petsc/private/pcimpl.h>
2: #include <petsc/private/matimpl.h>
3: #include <h2opusconf.h>
5: /* Use GPU only if H2OPUS is configured for GPU */
6: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
7: #define PETSC_H2OPUS_USE_GPU
8: #endif
10: typedef struct {
11: Mat A;
12: Mat M;
13: PetscScalar s0;
15: /* sampler for Newton-Schultz */
16: Mat S;
17: PetscInt hyperorder;
18: Vec wns[4];
19: Mat wnsmat[4];
21: /* convergence testing */
22: Mat T;
23: Vec w;
25: /* Support for PCSetCoordinates */
26: PetscInt sdim;
27: PetscInt nlocc;
28: PetscReal *coords;
30: /* Newton-Schultz customization */
31: PetscInt maxits;
32: PetscReal rtol, atol;
33: PetscBool monitor;
34: PetscBool useapproximatenorms;
35: NormType normtype;
37: /* Used when pmat != MATH2OPUS */
38: PetscReal eta;
39: PetscInt leafsize;
40: PetscInt max_rank;
41: PetscInt bs;
42: PetscReal mrtol;
44: /* CPU/GPU */
45: PetscBool forcecpu;
46: PetscBool boundtocpu;
47: } PC_H2OPUS;
49: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);
51: static PetscErrorCode PCReset_H2OPUS(PC pc)
52: {
53: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
55: pch2opus->sdim = 0;
56: pch2opus->nlocc = 0;
57: PetscFree(pch2opus->coords);
58: MatDestroy(&pch2opus->A);
59: MatDestroy(&pch2opus->M);
60: MatDestroy(&pch2opus->T);
61: VecDestroy(&pch2opus->w);
62: MatDestroy(&pch2opus->S);
63: VecDestroy(&pch2opus->wns[0]);
64: VecDestroy(&pch2opus->wns[1]);
65: VecDestroy(&pch2opus->wns[2]);
66: VecDestroy(&pch2opus->wns[3]);
67: MatDestroy(&pch2opus->wnsmat[0]);
68: MatDestroy(&pch2opus->wnsmat[1]);
69: MatDestroy(&pch2opus->wnsmat[2]);
70: MatDestroy(&pch2opus->wnsmat[3]);
71: return 0;
72: }
74: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
75: {
76: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
77: PetscBool reset = PETSC_TRUE;
79: if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
80: PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset);
81: reset = (PetscBool)!reset;
82: }
83: MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc));
84: if (reset) {
85: PCReset_H2OPUS(pc);
86: PetscMalloc1(sdim * nlocc, &pch2opus->coords);
87: PetscArraycpy(pch2opus->coords, coords, sdim * nlocc);
88: pch2opus->sdim = sdim;
89: pch2opus->nlocc = nlocc;
90: }
91: return 0;
92: }
94: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
95: {
96: PCReset_H2OPUS(pc);
97: PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL);
98: PetscFree(pc->data);
99: return 0;
100: }
102: static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems *PetscOptionsObject)
103: {
104: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
106: PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
107: PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL);
108: PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL);
109: PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL);
110: PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL);
111: PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL);
112: PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL);
113: PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL);
114: PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL);
115: PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL);
116: PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL);
117: PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL);
118: PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL);
119: PetscOptionsHeadEnd();
120: return 0;
121: }
123: typedef struct {
124: Mat A;
125: Mat M;
126: Vec w;
127: } AAtCtx;
129: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
130: {
131: AAtCtx *aat;
133: MatShellGetContext(A, (void *)&aat);
134: /* MatMultTranspose(aat->M,x,aat->w); */
135: MatMultTranspose(aat->A, x, aat->w);
136: MatMult(aat->A, aat->w, y);
137: return 0;
138: }
140: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
141: {
142: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
143: Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt;
144: PetscInt M, m;
145: VecType vtype;
146: PetscReal n;
147: AAtCtx aat;
149: aat.A = A;
150: aat.M = pch2opus->M; /* unused so far */
151: MatCreateVecs(A, NULL, &aat.w);
152: MatGetSize(A, &M, NULL);
153: MatGetLocalSize(A, &m, NULL);
154: MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt);
155: MatBindToCPU(AAt, pch2opus->boundtocpu);
156: MatShellSetOperation(AAt, MATOP_MULT, (void (*)(void))MatMult_AAt);
157: MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMult_AAt);
158: MatShellSetOperation(AAt, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS);
159: MatGetVecType(A, &vtype);
160: MatShellSetVecType(AAt, vtype);
161: MatNorm(AAt, NORM_1, &n);
162: pch2opus->s0 = 1. / n;
163: VecDestroy(&aat.w);
164: MatDestroy(&AAt);
165: return 0;
166: }
168: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
169: {
170: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
172: if (t) MatMultTranspose(pch2opus->M, x, y);
173: else MatMult(pch2opus->M, x, y);
174: return 0;
175: }
177: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
178: {
179: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
181: if (t) MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y);
182: else MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y);
183: return 0;
184: }
186: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
187: {
188: PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE);
189: return 0;
190: }
192: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
193: {
194: PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE);
195: return 0;
196: }
198: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
199: {
200: PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE);
201: return 0;
202: }
204: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
205: {
206: PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE);
207: return 0;
208: }
210: /* used to test the norm of (M^-1 A - I) */
211: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
212: {
213: PC pc;
214: Mat A;
215: PC_H2OPUS *pch2opus;
216: PetscBool sideleft = PETSC_TRUE;
218: MatShellGetContext(M, (void *)&pc);
219: pch2opus = (PC_H2OPUS *)pc->data;
220: if (!pch2opus->w) MatCreateVecs(pch2opus->M, &pch2opus->w, NULL);
221: A = pch2opus->A;
222: VecBindToCPU(pch2opus->w, pch2opus->boundtocpu);
223: if (t) {
224: if (sideleft) {
225: PCApplyTranspose_H2OPUS(pc, x, pch2opus->w);
226: MatMultTranspose(A, pch2opus->w, y);
227: } else {
228: MatMultTranspose(A, x, pch2opus->w);
229: PCApplyTranspose_H2OPUS(pc, pch2opus->w, y);
230: }
231: } else {
232: if (sideleft) {
233: MatMult(A, x, pch2opus->w);
234: PCApply_H2OPUS(pc, pch2opus->w, y);
235: } else {
236: PCApply_H2OPUS(pc, x, pch2opus->w);
237: MatMult(A, pch2opus->w, y);
238: }
239: }
240: VecAXPY(y, -1.0, x);
241: return 0;
242: }
244: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
245: {
246: MatMultKernel_MAmI(A, x, y, PETSC_FALSE);
247: return 0;
248: }
250: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
251: {
252: MatMultKernel_MAmI(A, x, y, PETSC_TRUE);
253: return 0;
254: }
256: /* HyperPower kernel:
257: Y = R = x
258: for i = 1 . . . l - 1 do
259: R = (I - A * Xk) * R
260: Y = Y + R
261: Y = Xk * Y
262: */
263: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
264: {
265: PC pc;
266: Mat A;
267: PC_H2OPUS *pch2opus;
269: MatShellGetContext(M, (void *)&pc);
270: pch2opus = (PC_H2OPUS *)pc->data;
271: A = pch2opus->A;
272: MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
273: MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);
274: VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu);
275: VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu);
276: VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu);
277: VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu);
278: VecCopy(x, pch2opus->wns[0]);
279: VecCopy(x, pch2opus->wns[3]);
280: if (t) {
281: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
282: MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]);
283: PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]);
284: VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]);
285: VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]);
286: }
287: PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y);
288: } else {
289: for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
290: PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]);
291: MatMult(A, pch2opus->wns[1], pch2opus->wns[2]);
292: VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]);
293: VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]);
294: }
295: PCApply_H2OPUS(pc, pch2opus->wns[3], y);
296: }
297: return 0;
298: }
300: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
301: {
302: MatMultKernel_Hyper(M, x, y, PETSC_FALSE);
303: return 0;
304: }
306: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
307: {
308: MatMultKernel_Hyper(M, x, y, PETSC_TRUE);
309: return 0;
310: }
312: /* Hyper power kernel, MatMat version */
313: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
314: {
315: PC pc;
316: Mat A;
317: PC_H2OPUS *pch2opus;
318: PetscInt i;
320: MatShellGetContext(M, (void *)&pc);
321: pch2opus = (PC_H2OPUS *)pc->data;
322: A = pch2opus->A;
323: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
324: MatDestroy(&pch2opus->wnsmat[0]);
325: MatDestroy(&pch2opus->wnsmat[1]);
326: }
327: if (!pch2opus->wnsmat[0]) {
328: MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]);
329: MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]);
330: }
331: if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
332: MatDestroy(&pch2opus->wnsmat[2]);
333: MatDestroy(&pch2opus->wnsmat[3]);
334: }
335: if (!pch2opus->wnsmat[2]) {
336: MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]);
337: MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]);
338: }
339: MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN);
340: MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN);
341: if (t) {
342: for (i = 0; i < pch2opus->hyperorder - 1; i++) {
343: MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]);
344: PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]);
345: MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN);
346: MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN);
347: }
348: PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y);
349: } else {
350: for (i = 0; i < pch2opus->hyperorder - 1; i++) {
351: PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]);
352: MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[2]);
353: MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN);
354: MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN);
355: }
356: PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y);
357: }
358: return 0;
359: }
361: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, void *ctx)
362: {
363: MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE);
364: return 0;
365: }
367: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
368: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
369: {
370: PC pc;
371: Mat A;
372: PC_H2OPUS *pch2opus;
374: MatShellGetContext(M, (void *)&pc);
375: pch2opus = (PC_H2OPUS *)pc->data;
376: A = pch2opus->A;
377: MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
378: VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu);
379: VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu);
380: if (t) {
381: PCApplyTranspose_H2OPUS(pc, x, y);
382: MatMultTranspose(A, y, pch2opus->wns[1]);
383: PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]);
384: VecAXPBY(y, -1., 2., pch2opus->wns[0]);
385: } else {
386: PCApply_H2OPUS(pc, x, y);
387: MatMult(A, y, pch2opus->wns[0]);
388: PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]);
389: VecAXPBY(y, -1., 2., pch2opus->wns[1]);
390: }
391: return 0;
392: }
394: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
395: {
396: MatMultKernel_NS(M, x, y, PETSC_FALSE);
397: return 0;
398: }
400: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
401: {
402: MatMultKernel_NS(M, x, y, PETSC_TRUE);
403: return 0;
404: }
406: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
407: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
408: {
409: PC pc;
410: Mat A;
411: PC_H2OPUS *pch2opus;
413: MatShellGetContext(M, (void *)&pc);
414: pch2opus = (PC_H2OPUS *)pc->data;
415: A = pch2opus->A;
416: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
417: MatDestroy(&pch2opus->wnsmat[0]);
418: MatDestroy(&pch2opus->wnsmat[1]);
419: }
420: if (!pch2opus->wnsmat[0]) {
421: MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]);
422: MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]);
423: }
424: if (t) {
425: PCApplyTransposeMat_H2OPUS(pc, X, Y);
426: MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[1]);
427: PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]);
428: MatScale(Y, 2.);
429: MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN);
430: } else {
431: PCApplyMat_H2OPUS(pc, X, Y);
432: MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_DEFAULT, &pch2opus->wnsmat[0]);
433: PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]);
434: MatScale(Y, 2.);
435: MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN);
436: }
437: return 0;
438: }
440: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
441: {
442: MatMatMultKernel_NS(M, X, Y, PETSC_FALSE);
443: return 0;
444: }
446: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
447: {
448: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
449: Mat A = pc->useAmat ? pc->mat : pc->pmat;
451: if (!pch2opus->S) {
452: PetscInt M, N, m, n;
454: MatGetSize(A, &M, &N);
455: MatGetLocalSize(A, &m, &n);
456: MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S);
457: MatSetBlockSizesFromMats(pch2opus->S, A, A);
458: #if defined(PETSC_H2OPUS_USE_GPU)
459: MatShellSetVecType(pch2opus->S, VECCUDA);
460: #endif
461: }
462: if (pch2opus->hyperorder >= 2) {
463: MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_Hyper);
464: MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Hyper);
465: MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE);
466: MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA);
467: } else {
468: MatShellSetOperation(pch2opus->S, MATOP_MULT, (void (*)(void))MatMult_NS);
469: MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_NS);
470: MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE);
471: MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA);
472: }
473: MatPropagateSymmetryOptions(A, pch2opus->S);
474: MatBindToCPU(pch2opus->S, pch2opus->boundtocpu);
475: /* XXX */
476: MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE);
477: return 0;
478: }
480: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
481: {
482: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
483: Mat A = pc->useAmat ? pc->mat : pc->pmat;
484: NormType norm = pch2opus->normtype;
485: PetscReal initerr = 0.0, err;
486: PetscBool ish2opus;
488: if (!pch2opus->T) {
489: PetscInt M, N, m, n;
490: const char *prefix;
492: PCGetOptionsPrefix(pc, &prefix);
493: MatGetSize(A, &M, &N);
494: MatGetLocalSize(A, &m, &n);
495: MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T);
496: MatSetBlockSizesFromMats(pch2opus->T, A, A);
497: MatShellSetOperation(pch2opus->T, MATOP_MULT, (void (*)(void))MatMult_MAmI);
498: MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_MAmI);
499: MatShellSetOperation(pch2opus->T, MATOP_NORM, (void (*)(void))MatNorm_H2OPUS);
500: #if defined(PETSC_H2OPUS_USE_GPU)
501: MatShellSetVecType(pch2opus->T, VECCUDA);
502: #endif
503: MatSetOptionsPrefix(pch2opus->T, prefix);
504: MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_");
505: }
506: MatDestroy(&pch2opus->A);
507: PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus);
508: if (ish2opus) {
509: PetscObjectReference((PetscObject)A);
510: pch2opus->A = A;
511: } else {
512: const char *prefix;
513: MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A);
514: /* XXX */
515: MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE);
516: PCGetOptionsPrefix(pc, &prefix);
517: MatSetOptionsPrefix(pch2opus->A, prefix);
518: MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_");
519: MatSetFromOptions(pch2opus->A);
520: MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY);
521: MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY);
522: /* XXX */
523: MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE);
525: /* always perform construction on the GPU unless forcecpu is true */
526: MatBindToCPU(pch2opus->A, pch2opus->forcecpu);
527: }
528: #if defined(PETSC_H2OPUS_USE_GPU)
529: pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
530: #endif
531: MatBindToCPU(pch2opus->T, pch2opus->boundtocpu);
532: if (pch2opus->M) { /* see if we can reuse M as initial guess */
533: PetscReal reuse;
535: MatBindToCPU(pch2opus->M, pch2opus->boundtocpu);
536: MatNorm(pch2opus->T, norm, &reuse);
537: if (reuse >= 1.0) MatDestroy(&pch2opus->M);
538: }
539: if (!pch2opus->M) {
540: const char *prefix;
541: MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M);
542: PCGetOptionsPrefix(pc, &prefix);
543: MatSetOptionsPrefix(pch2opus->M, prefix);
544: MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_");
545: MatSetFromOptions(pch2opus->M);
546: PCH2OpusSetUpInit(pc);
547: MatScale(pch2opus->M, pch2opus->s0);
548: }
549: /* A and M have the same h2 matrix structure, save on reordering routines */
550: MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE);
551: MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE);
552: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) MatNorm(pch2opus->T, norm, &initerr);
553: if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
554: err = initerr;
555: if (pch2opus->monitor) PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr));
556: if (initerr > pch2opus->atol && !pc->failedreason) {
557: PetscInt i;
559: PCH2OpusSetUpSampler_Private(pc);
560: for (i = 0; i < pch2opus->maxits; i++) {
561: Mat M;
562: const char *prefix;
564: MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M);
565: MatGetOptionsPrefix(pch2opus->M, &prefix);
566: MatSetOptionsPrefix(M, prefix);
567: MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE);
568: MatSetFromOptions(M);
569: MatH2OpusSetNativeMult(M, PETSC_TRUE);
570: MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY);
571: MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY);
572: MatDestroy(&pch2opus->M);
573: pch2opus->M = M;
574: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) MatNorm(pch2opus->T, norm, &err);
575: if (pch2opus->monitor) PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr));
576: if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
577: if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
578: }
579: }
580: /* cleanup setup workspace */
581: MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE);
582: MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE);
583: VecDestroy(&pch2opus->wns[0]);
584: VecDestroy(&pch2opus->wns[1]);
585: VecDestroy(&pch2opus->wns[2]);
586: VecDestroy(&pch2opus->wns[3]);
587: MatDestroy(&pch2opus->wnsmat[0]);
588: MatDestroy(&pch2opus->wnsmat[1]);
589: MatDestroy(&pch2opus->wnsmat[2]);
590: MatDestroy(&pch2opus->wnsmat[3]);
591: return 0;
592: }
594: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
595: {
596: PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
597: PetscBool isascii;
599: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
600: if (isascii) {
601: if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
602: PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n");
603: PetscViewerASCIIPushTab(viewer);
604: PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL);
605: MatView(pch2opus->A, viewer);
606: PetscViewerPopFormat(viewer);
607: PetscViewerASCIIPopTab(viewer);
608: }
609: if (pch2opus->M) {
610: PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n");
611: PetscViewerASCIIPushTab(viewer);
612: PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL);
613: MatView(pch2opus->M, viewer);
614: PetscViewerPopFormat(viewer);
615: PetscViewerASCIIPopTab(viewer);
616: }
617: PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0);
618: }
619: return 0;
620: }
622: /*MC
623: PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.
625: Options Database Keys:
626: + -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
627: . -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
628: . -pc_h2opus_monitor - monitor Newton-Schultz convergence
629: . -pc_h2opus_atol - absolute tolerance
630: . -pc_h2opus_rtol - relative tolerance
631: . -pc_h2opus_norm_type - normtype
632: . -pc_h2opus_hyperorder - Hyper power order of sampling
633: . -pc_h2opus_leafsize - leaf size when constructed from kernel
634: . -pc_h2opus_eta - admissibility condition tolerance
635: . -pc_h2opus_maxrank - maximum rank when constructed from matvecs
636: . -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
637: . -pc_h2opus_mrtol - relative tolerance for construction from sampling
638: - -pc_h2opus_forcecpu - force construction of preconditioner on CPU
640: Level: intermediate
642: .seealso: `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
643: M*/
645: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
646: {
647: PC_H2OPUS *pch2opus;
649: PetscNew(&pch2opus);
650: pc->data = (void *)pch2opus;
652: pch2opus->atol = 1.e-2;
653: pch2opus->rtol = 1.e-6;
654: pch2opus->maxits = 50;
655: pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
656: pch2opus->normtype = NORM_2;
658: /* these are needed when we are sampling the pmat */
659: pch2opus->eta = PETSC_DECIDE;
660: pch2opus->leafsize = PETSC_DECIDE;
661: pch2opus->max_rank = PETSC_DECIDE;
662: pch2opus->bs = PETSC_DECIDE;
663: pch2opus->mrtol = PETSC_DECIDE;
664: #if defined(PETSC_H2OPUS_USE_GPU)
665: pch2opus->boundtocpu = PETSC_FALSE;
666: #else
667: pch2opus->boundtocpu = PETSC_TRUE;
668: #endif
669: pc->ops->destroy = PCDestroy_H2OPUS;
670: pc->ops->setup = PCSetUp_H2OPUS;
671: pc->ops->apply = PCApply_H2OPUS;
672: pc->ops->matapply = PCApplyMat_H2OPUS;
673: pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
674: pc->ops->reset = PCReset_H2OPUS;
675: pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
676: pc->ops->view = PCView_H2OPUS;
678: PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS);
679: return 0;
680: }