Actual source code: pipecgrr.c


  2: #include <petsc/private/kspimpl.h>

  4: /*
  5:      KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.

  7:       This is called once, usually automatically by KSPSolve() or KSPSetUp()
  8:      but can be called directly by KSPSetUp()
  9: */
 10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
 11: {
 12:   /* get work vectors needed by PIPECGRR */
 13:   KSPSetWorkVecs(ksp, 9);
 14:   return 0;
 15: }

 17: /*
 18:  KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
 19: */
 20: static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
 21: {
 22:   PetscInt    i = 0, replace = 0, nsize;
 23:   PetscScalar alpha = 0.0, beta = 0.0, gamma = 0.0, gammaold = 0.0, delta = 0.0, alphap = 0.0, betap = 0.0;
 24:   PetscReal   dp = 0.0, nsi = 0.0, sqn = 0.0, Anorm = 0.0, rnp = 0.0, pnp = 0.0, snp = 0.0, unp = 0.0, wnp = 0.0, xnp = 0.0, qnp = 0.0, znp = 0.0, mnz = 5.0, tol = PETSC_SQRT_MACHINE_EPSILON, eps = PETSC_MACHINE_EPSILON;
 25:   PetscReal   ds = 0.0, dz = 0.0, dx = 0.0, dpp = 0.0, dq = 0.0, dm = 0.0, du = 0.0, dw = 0.0, db = 0.0, errr = 0.0, errrprev = 0.0, errs = 0.0, errw = 0.0, errz = 0.0, errncr = 0.0, errncs = 0.0, errncw = 0.0, errncz = 0.0;
 26:   Vec         X, B, Z, P, W, Q, U, M, N, R, S;
 27:   Mat         Amat, Pmat;
 28:   PetscBool   diagonalscale;

 30:   PCGetDiagonalScale(ksp->pc, &diagonalscale);

 33:   X = ksp->vec_sol;
 34:   B = ksp->vec_rhs;
 35:   M = ksp->work[0];
 36:   Z = ksp->work[1];
 37:   P = ksp->work[2];
 38:   N = ksp->work[3];
 39:   W = ksp->work[4];
 40:   Q = ksp->work[5];
 41:   U = ksp->work[6];
 42:   R = ksp->work[7];
 43:   S = ksp->work[8];

 45:   PCGetOperators(ksp->pc, &Amat, &Pmat);

 47:   ksp->its = 0;
 48:   if (!ksp->guess_zero) {
 49:     KSP_MatMult(ksp, Amat, X, R); /*  r <- b - Ax  */
 50:     VecAYPX(R, -1.0, B);
 51:   } else {
 52:     VecCopy(B, R); /*  r <- b (x is 0)  */
 53:   }

 55:   KSP_PCApply(ksp, R, U); /*  u <- Br  */

 57:   switch (ksp->normtype) {
 58:   case KSP_NORM_PRECONDITIONED:
 59:     VecNormBegin(U, NORM_2, &dp); /*  dp <- u'*u = e'*A'*B'*B*A'*e'  */
 60:     VecNormBegin(B, NORM_2, &db);
 61:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
 62:     KSP_MatMult(ksp, Amat, U, W); /*  w <- Au  */
 63:     VecNormEnd(U, NORM_2, &dp);
 64:     VecNormEnd(B, NORM_2, &db);
 65:     break;
 66:   case KSP_NORM_UNPRECONDITIONED:
 67:     VecNormBegin(R, NORM_2, &dp); /*  dp <- r'*r = e'*A'*A*e  */
 68:     VecNormBegin(B, NORM_2, &db);
 69:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 70:     KSP_MatMult(ksp, Amat, U, W); /*  w <- Au  */
 71:     VecNormEnd(R, NORM_2, &dp);
 72:     VecNormEnd(B, NORM_2, &db);
 73:     break;
 74:   case KSP_NORM_NATURAL:
 75:     VecDotBegin(R, U, &gamma); /*  gamma <- u'*r  */
 76:     VecNormBegin(B, NORM_2, &db);
 77:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
 78:     KSP_MatMult(ksp, Amat, U, W); /*  w <- Au  */
 79:     VecDotEnd(R, U, &gamma);
 80:     VecNormEnd(B, NORM_2, &db);
 81:     KSPCheckDot(ksp, gamma);
 82:     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /*  dp <- r'*u = r'*B*r = e'*A'*B*A*e  */
 83:     break;
 84:   case KSP_NORM_NONE:
 85:     KSP_MatMult(ksp, Amat, U, W);
 86:     dp = 0.0;
 87:     break;
 88:   default:
 89:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
 90:   }
 91:   KSPLogResidualHistory(ksp, dp);
 92:   KSPMonitor(ksp, 0, dp);
 93:   ksp->rnorm = dp;
 94:   (*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP); /*  test for convergence  */
 95:   if (ksp->reason) return 0;

 97:   MatNorm(Amat, NORM_INFINITY, &Anorm);
 98:   VecGetSize(B, &nsize);
 99:   nsi = (PetscReal)nsize;
100:   sqn = PetscSqrtReal(nsi);

102:   do {
103:     if (i > 1) {
104:       pnp = dpp;
105:       snp = ds;
106:       qnp = dq;
107:       znp = dz;
108:     }
109:     if (i > 0) {
110:       rnp    = dp;
111:       unp    = du;
112:       wnp    = dw;
113:       xnp    = dx;
114:       alphap = alpha;
115:       betap  = beta;
116:     }

118:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
119:       VecNormBegin(R, NORM_2, &dp);
120:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
121:       VecNormBegin(U, NORM_2, &dp);
122:     }
123:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) VecDotBegin(R, U, &gamma);
124:     VecDotBegin(W, U, &delta);

126:     if (i > 0) {
127:       VecNormBegin(S, NORM_2, &ds);
128:       VecNormBegin(Z, NORM_2, &dz);
129:       VecNormBegin(P, NORM_2, &dpp);
130:       VecNormBegin(Q, NORM_2, &dq);
131:       VecNormBegin(M, NORM_2, &dm);
132:     }
133:     VecNormBegin(X, NORM_2, &dx);
134:     VecNormBegin(U, NORM_2, &du);
135:     VecNormBegin(W, NORM_2, &dw);

137:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
138:     KSP_PCApply(ksp, W, M);       /*   m <- Bw       */
139:     KSP_MatMult(ksp, Amat, M, N); /*   n <- Am       */

141:     if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
142:       VecNormEnd(R, NORM_2, &dp);
143:     } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
144:       VecNormEnd(U, NORM_2, &dp);
145:     }
146:     if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) VecDotEnd(R, U, &gamma);
147:     VecDotEnd(W, U, &delta);

149:     if (i > 0) {
150:       VecNormEnd(S, NORM_2, &ds);
151:       VecNormEnd(Z, NORM_2, &dz);
152:       VecNormEnd(P, NORM_2, &dpp);
153:       VecNormEnd(Q, NORM_2, &dq);
154:       VecNormEnd(M, NORM_2, &dm);
155:     }
156:     VecNormEnd(X, NORM_2, &dx);
157:     VecNormEnd(U, NORM_2, &du);
158:     VecNormEnd(W, NORM_2, &dw);

160:     if (i > 0) {
161:       if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
162:       else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;

164:       ksp->rnorm = dp;
165:       KSPLogResidualHistory(ksp, dp);
166:       KSPMonitor(ksp, i, dp);
167:       (*ksp->converged)(ksp, i, dp, &ksp->reason, ksp->cnvP);
168:       if (ksp->reason) return 0;
169:     }

171:     if (i == 0) {
172:       alpha = gamma / delta;
173:       VecCopy(N, Z); /*  z <- n  */
174:       VecCopy(M, Q); /*  q <- m  */
175:       VecCopy(U, P); /*  p <- u  */
176:       VecCopy(W, S); /*  s <- w  */
177:     } else {
178:       beta  = gamma / gammaold;
179:       alpha = gamma / (delta - beta / alpha * gamma);
180:       VecAYPX(Z, beta, N); /*  z <- n + beta * z  */
181:       VecAYPX(Q, beta, M); /*  q <- m + beta * q  */
182:       VecAYPX(P, beta, U); /*  p <- u + beta * p  */
183:       VecAYPX(S, beta, W); /*  s <- w + beta * s  */
184:     }
185:     VecAXPY(X, alpha, P);  /*  x <- x + alpha * p  */
186:     VecAXPY(U, -alpha, Q); /*  u <- u - alpha * q  */
187:     VecAXPY(W, -alpha, Z); /*  w <- w - alpha * z  */
188:     VecAXPY(R, -alpha, S); /*  r <- r - alpha * s  */
189:     gammaold = gamma;

191:     if (i > 0) {
192:       errncr = PetscSqrtReal(Anorm * xnp + 2.0 * Anorm * PetscAbsScalar(alphap) * dpp + rnp + 2.0 * PetscAbsScalar(alphap) * ds) * eps;
193:       errncw = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(alphap) * dq + wnp + 2.0 * PetscAbsScalar(alphap) * dz) * eps;
194:     }
195:     if (i > 1) {
196:       errncs = PetscSqrtReal(Anorm * unp + 2.0 * Anorm * PetscAbsScalar(betap) * pnp + wnp + 2.0 * PetscAbsScalar(betap) * snp) * eps;
197:       errncz = PetscSqrtReal((mnz * sqn + 2) * Anorm * dm + 2.0 * Anorm * PetscAbsScalar(betap) * qnp + 2.0 * PetscAbsScalar(betap) * znp) * eps;
198:     }

200:     if (i > 0) {
201:       if (i == 1) {
202:         errr = PetscSqrtReal((mnz * sqn + 1) * Anorm * xnp + db) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dpp) * eps + errncr;
203:         errs = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
204:         errw = PetscSqrtReal(mnz * sqn * Anorm * unp) * eps + PetscSqrtReal(PetscAbsScalar(alphap) * mnz * sqn * Anorm * dq) * eps + errncw;
205:         errz = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
206:       } else if (replace == 1) {
207:         errrprev = errr;
208:         errr     = PetscSqrtReal((mnz * sqn + 1) * Anorm * dx + db) * eps;
209:         errs     = PetscSqrtReal(mnz * sqn * Anorm * dpp) * eps;
210:         errw     = PetscSqrtReal(mnz * sqn * Anorm * du) * eps;
211:         errz     = PetscSqrtReal(mnz * sqn * Anorm * dq) * eps;
212:         replace  = 0;
213:       } else {
214:         errrprev = errr;
215:         errr     = errr + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errs + PetscAbsScalar(alphap) * errw + errncr + PetscAbsScalar(alphap) * errncs;
216:         errs     = errw + PetscAbsScalar(betap) * errs + errncs;
217:         errw     = errw + PetscAbsScalar(alphap) * PetscAbsScalar(betap) * errz + errncw + PetscAbsScalar(alphap) * errncz;
218:         errz     = PetscAbsScalar(betap) * errz + errncz;
219:       }
220:       if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
221:         KSP_MatMult(ksp, Amat, X, R); /*  r <- Ax - b  */
222:         VecAYPX(R, -1.0, B);
223:         KSP_PCApply(ksp, R, U);       /*  u <- Br  */
224:         KSP_MatMult(ksp, Amat, U, W); /*  w <- Au  */
225:         KSP_MatMult(ksp, Amat, P, S); /*  s <- Ap  */
226:         KSP_PCApply(ksp, S, Q);       /*  q <- Bs  */
227:         KSP_MatMult(ksp, Amat, Q, Z); /*  z <- Aq  */
228:         replace = 1;
229:       }
230:     }

232:     i++;
233:     ksp->its = i;

235:   } while (i <= ksp->max_it);
236:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
237:   return 0;
238: }

240: /*MC
241:    KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements. [](sec_pipelineksp)

243:    Level: intermediate

245:    Notes:
246:    This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard `KSPCG`.  The
247:    non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.

249:    `KSPPIPECGRR` improves the robustness of `KSPPIPECG` by adding an automated residual replacement strategy.
250:    True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
251:    iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.

253:    See also `KSPPIPECG`, which is identical to `KSPPIPECGRR` without residual replacements.
254:    See also `KSPPIPECR`, where the reduction is only overlapped with the matrix-vector product.

256:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
257:    performance of pipelined methods. See [](doc_faq_pipelined)

259:    Contributed by:
260:    Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
261:    European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)

263:    Reference:
264:    S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
265:    propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
266:    SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426--450, 2018.

268: .seealso: [](chapter_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), `KSPCreate()`, `KSPSetType()`, `KSPPIPECR`, `KSPGROPPCG`, `KSPPIPECG`, `KSPPGMRES`, `KSPCG`, `KSPPIPEBCGS`, `KSPCGUseSingleReduction()`
269: M*/
270: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
271: {
272:   KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 2);
273:   KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2);
274:   KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2);
275:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);

277:   ksp->ops->setup          = KSPSetUp_PIPECGRR;
278:   ksp->ops->solve          = KSPSolve_PIPECGRR;
279:   ksp->ops->destroy        = KSPDestroyDefault;
280:   ksp->ops->view           = NULL;
281:   ksp->ops->setfromoptions = NULL;
282:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
283:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
284:   return 0;
285: }