Actual source code: pipecgrr.c
2: #include <petsc/private/kspimpl.h>
4: /*
5: KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.
7: This is called once, usually automatically by KSPSolve() or KSPSetUp()
8: but can be called directly by KSPSetUp()
9: */
10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
11: {
12: /* get work vectors needed by PIPECGRR */
13: KSPSetWorkVecs(ksp, 9);
14: return 0;
15: }
17: /*
18: KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
19: */
20: static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
21: {
22: PetscInt i = 0, replace = 0, nsize;
23: PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0, alphap = 0.0, betap = 0.0;
24: PetscReal dp = 0.0, nsi = 0.0, sqn = 0.0, Anorm = 0.0, rnp = 0.0, pnp = 0.0, snp = 0.0, unp = 0.0, wnp = 0.0, xnp = 0.0, qnp = 0.0, znp = 0.0, mnz = 5.0, tol = PETSC_SQRT_MACHINE_EPSILON, eps = PETSC_MACHINE_EPSILON;
25: PetscReal ds = 0.0, dz = 0.0, dx = 0.0, dpp = 0.0, dq = 0.0, dm = 0.0, du = 0.0, dw = 0.0, db = 0.0, errr = 0.0, errrprev = 0.0, errs = 0.0, errw = 0.0, errz = 0.0, errncr = 0.0, errncs = 0.0, errncw = 0.0, errncz = 0.0;
26: Vec X, B, Z, P, W, Q, U, M, N, R, S;
27: Mat Amat, Pmat;
28: PetscBool diagonalscale;
30: PCGetDiagonalScale(ksp->pc, &diagonalscale);
33: X = ksp->vec_sol;
34: B = ksp->vec_rhs;
35: M = ksp->work[0];
36: Z = ksp->work[1];
37: P = ksp->work[2];
38: N = ksp->work[3];
39: W = ksp->work[4];
40: Q = ksp->work[5];
41: U = ksp->work[6];
42: R = ksp->work[7];
43: S = ksp->work[8];
45: PCGetOperators(ksp->pc, &Amat, &Pmat);
47: ksp->its = 0;
48: if (!ksp->guess_zero) {
49: KSP_MatMult(ksp, Amat, X, R); /* r <- b - Ax */
50: VecAYPX(R, -1.0, B);
51: } else {
52: VecCopy(B, R); /* r <- b (x is 0) */
53: }
55: KSP_PCApply(ksp, R, U); /* u <- Br */
57: switch (ksp->normtype) {
58: case KSP_NORM_PRECONDITIONED:
59: VecNormBegin(U, NORM_2, &dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
60: VecNormBegin(B, NORM_2, &db);
61: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
62: KSP_MatMult(ksp, Amat, U, W); /* w <- Au */
63: VecNormEnd(U, NORM_2, &dp);
64: VecNormEnd(B, NORM_2, &db);
65: break;
66: case KSP_NORM_UNPRECONDITIONED:
67: VecNormBegin(R, NORM_2, &dp); /* dp <- r'*r = e'*A'*A*e */
68: VecNormBegin(B, NORM_2, &db);
69: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
70: KSP_MatMult(ksp, Amat, U, W); /* w <- Au */
71: VecNormEnd(R, NORM_2, &dp);
72: VecNormEnd(B, NORM_2, &db);
73: break;
74: case KSP_NORM_NATURAL:
75: VecDotBegin(R, U, &gamma); /* gamma <- u'*r */
76: VecNormBegin(B, NORM_2, &db);
77: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
78: KSP_MatMult(ksp, Amat, U, W); /* w <- Au */
79: VecDotEnd(R, U, &gamma);
80: VecNormEnd(B, NORM_2, &db);
81: KSPCheckDot(ksp, gamma);
82: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
83: break;
84: case KSP_NORM_NONE:
85: KSP_MatMult(ksp, Amat, U, W);
86: dp = 0.0;
87: break;
88: default:
89: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
90: }
91: KSPLogResidualHistory(ksp, dp);
92: KSPMonitor(ksp, 0, dp);
93: ksp->rnorm = dp;
94: (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP); /* test for convergence */
95: if (ksp->reason) return 0;
97: MatNorm(Amat, NORM_INFINITY, &Anorm);
98: VecGetSize(B, &nsize);
99: nsi = (PetscReal)nsize;
100: sqn = PetscSqrtReal(nsi);
102: do {
103: if (i > 1) {
104: pnp = dpp;
105: snp = ds;
106: qnp = dq;
107: znp = dz;
108: }
109: if (i > 0) {
110: rnp = dp;
111: unp = du;
112: wnp = dw;
113: xnp = dx;
114: alphap = alpha;
115: betap = beta;
116: }
118: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
119: VecNormBegin(R, NORM_2, &dp);
120: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
121: VecNormBegin(U, NORM_2, &dp);
122: }
123: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) VecDotBegin(R, U, &gamma);
124: VecDotBegin(W, U, &delta);
126: if (i > 0) {
127: VecNormBegin(S, NORM_2, &ds);
128: VecNormBegin(Z, NORM_2, &dz);
129: VecNormBegin(P, NORM_2, &dpp);
130: VecNormBegin(Q, NORM_2, &dq);
131: VecNormBegin(M, NORM_2, &dm);
132: }
133: VecNormBegin(X, NORM_2, &dx);
134: VecNormBegin(U, NORM_2, &du);
135: VecNormBegin(W, NORM_2, &dw);
137: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
138: KSP_PCApply(ksp, W, M); /* m <- Bw */
139: KSP_MatMult(ksp, Amat, M, N); /* n <- Am */
141: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
142: VecNormEnd(R, NORM_2, &dp);
143: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
144: VecNormEnd(U, NORM_2, &dp);
145: }
146: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) VecDotEnd(R, U, &gamma);
147: VecDotEnd(W, U, &delta);
149: if (i > 0) {
150: VecNormEnd(S, NORM_2, &ds);
151: VecNormEnd(Z, NORM_2, &dz);
152: VecNormEnd(P, NORM_2, &dpp);
153: VecNormEnd(Q, NORM_2, &dq);
154: VecNormEnd(M, NORM_2, &dm);
155: }
156: VecNormEnd(X, NORM_2, &dx);
157: VecNormEnd(U, NORM_2, &du);
158: VecNormEnd(W, NORM_2, &dw);
160: if (i > 0) {
161: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
162: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
164: ksp->rnorm = dp;
165: KSPLogResidualHistory(ksp, dp);
166: KSPMonitor(ksp, i, dp);
167: (*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP);
168: if (ksp->reason) return 0;
169: }
171: if (i == 0) {
172: alpha = gamma / delta;
173: VecCopy(N, Z); /* z <- n */
174: VecCopy(M, Q); /* q <- m */
175: VecCopy(U, P); /* p <- u */
176: VecCopy(W, S); /* s <- w */
177: } else {
178: beta = gamma / gammaold;
179: alpha = gamma / (delta - beta / alpha * gamma);
180: VecAYPX(Z, beta, N); /* z <- n + beta * z */
181: VecAYPX(Q, beta, M); /* q <- m + beta * q */
182: VecAYPX(P, beta, U); /* p <- u + beta * p */
183: VecAYPX(S, beta, W); /* s <- w + beta * s */
184: }
185: VecAXPY(X, alpha, P); /* x <- x + alpha * p */
186: VecAXPY(U, -alpha, Q); /* u <- u - alpha * q */
187: VecAXPY(W, -alpha, Z); /* w <- w - alpha * z */
188: VecAXPY(R, -alpha, S); /* r <- r - alpha * s */
189: gammaold = gamma;
191: if (i > 0) {
192: errncr = PetscSqrtReal(Anorm * xnp + 2.0 * Anorm * PetscAbsScalar(alphap) * dpp + rnp + 2.0 * PetscAbsScalar(alphap) * ds) * eps;
193: errncw = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(alphap) * dq + wnp + 2.0 * PetscAbsScalar(alphap) * dz) * eps;
194: }
195: if (i > 1) {
196: errncs = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(betap) * pnp + wnp + 2.0 * PetscAbsScalar(betap) * snp) * eps;
197: errncz = PetscSqrtReal((mnz * sqn + 2) * Anorm * dm + 2.0 * Anorm * PetscAbsScalar(betap) * qnp + 2.0 * PetscAbsScalar(betap) * znp) * eps;
198: }
200: if (i > 0) {
201: if (i == 1) {
202: errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * xnp + db) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dpp) * eps + errncr;
203: errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
204: errw = PetscSqrtReal(mnz * sqn * Anorm * unp) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dq) * eps + errncw;
205: errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
206: } else if (replace == 1) {
207: errrprev = errr;
208: errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * dx + db) * eps;
209: errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
210: errw = PetscSqrtReal(mnz * sqn * Anorm * du) * eps;
211: errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
212: replace = 0;
213: } else {
214: errrprev = errr;
215: errr = errr + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errs + PetscAbsScalar(alphap) * errw + errncr + PetscAbsScalar(alphap) * errncs;
216: errs = errw + PetscAbsScalar(betap) * errs + errncs;
217: errw = errw + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errz + errncw + PetscAbsScalar(alphap) * errncz;
218: errz = PetscAbsScalar(betap) * errz + errncz;
219: }
220: if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
221: KSP_MatMult(ksp, Amat, X, R); /* r <- Ax - b */
222: VecAYPX(R, -1.0, B);
223: KSP_PCApply(ksp, R, U); /* u <- Br */
224: KSP_MatMult(ksp, Amat, U, W); /* w <- Au */
225: KSP_MatMult(ksp, Amat, P, S); /* s <- Ap */
226: KSP_PCApply(ksp, S, Q); /* q <- Bs */
227: KSP_MatMult(ksp, Amat, Q, Z); /* z <- Aq */
228: replace = 1;
229: }
230: }
232: i++;
233: ksp->its = i;
235: } while (i <= ksp->max_it);
236: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
237: return 0;
238: }
240: /*MC
241: KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements. [](sec_pipelineksp)
243: Level: intermediate
245: Notes:
246: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`. The
247: non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
249: `KSPPIPECGRR` improves the robustness of `KSPPIPECG` by adding an automated residual replacement strategy.
250: True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
251: iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.
253: See also `KSPPIPECG`, which is identical to `KSPPIPECGRR` without residual replacements.
254: See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product.
256: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
257: performance of pipelined methods. See [](doc_faq_pipelined)
259: Contributed by:
260: Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
261: European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)
263: Reference:
264: S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
265: propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
266: SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426--450, 2018.
268: .seealso: [](chapter_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPIPECG`, `KSPPGMRES`, `KSPCG`, `KSPPIPEBCGS`, `KSPCGUseSingleReduction()`
269: M*/
270: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
271: {
272: KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2);
273: KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2);
274: KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2);
275: KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);
277: ksp->ops->setup = KSPSetUp_PIPECGRR;
278: ksp->ops->solve = KSPSolve_PIPECGRR;
279: ksp->ops->destroy = KSPDestroyDefault;
280: ksp->ops->view = NULL;
281: ksp->ops->setfromoptions = NULL;
282: ksp->ops->buildsolution = KSPBuildSolutionDefault;
283: ksp->ops->buildresidual = KSPBuildResidualDefault;
284: return 0;
285: }