Actual source code: pgmres.c
2: /*
3: This file implements PGMRES (a Pipelined Generalized Minimal Residual method)
4: */
6: #include <../src/ksp/ksp/impls/gmres/pgmres/pgmresimpl.h>
7: #define PGMRES_DELTA_DIRECTIONS 10
8: #define PGMRES_DEFAULT_MAXK 30
10: static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
11: static PetscErrorCode KSPPGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
13: /*
15: KSPSetUp_PGMRES - Sets up the workspace needed by pgmres.
17: This is called once, usually automatically by KSPSolve() or KSPSetUp(),
18: but can be called directly by KSPSetUp().
20: */
21: static PetscErrorCode KSPSetUp_PGMRES(KSP ksp)
22: {
23: KSPSetUp_GMRES(ksp);
24: return 0;
25: }
27: /*
29: KSPPGMRESCycle - Run pgmres, possibly with restart. Return residual
30: history if requested.
32: input parameters:
33: . pgmres - structure containing parameters and work areas
35: output parameters:
36: . itcount - number of iterations used. If null, ignored.
37: . converged - 0 if not converged
39: Notes:
40: On entry, the value in vector VEC_VV(0) should be
41: the initial residual.
43: */
44: static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount, KSP ksp)
45: {
46: KSP_PGMRES *pgmres = (KSP_PGMRES *)(ksp->data);
47: PetscReal res_norm, res, newnorm;
48: PetscInt it = 0, j, k;
49: PetscBool hapend = PETSC_FALSE;
51: if (itcount) *itcount = 0;
52: VecNormalize(VEC_VV(0), &res_norm);
53: KSPCheckNorm(ksp, res_norm);
54: res = res_norm;
55: *RS(0) = res_norm;
57: /* check for the convergence */
58: PetscObjectSAWsTakeAccess((PetscObject)ksp);
59: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
60: else ksp->rnorm = 0;
61: PetscObjectSAWsGrantAccess((PetscObject)ksp);
62: pgmres->it = it - 2;
63: KSPLogResidualHistory(ksp, ksp->rnorm);
64: KSPMonitor(ksp, ksp->its, ksp->rnorm);
65: if (!res) {
66: ksp->reason = KSP_CONVERGED_ATOL;
67: PetscInfo(ksp, "Converged due to zero residual norm on entry\n");
68: return 0;
69: }
71: (*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP);
72: for (; !ksp->reason; it++) {
73: Vec Zcur, Znext;
74: if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) KSPGMRESGetNewVectors(ksp, it + 1);
75: /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */
76: Zcur = VEC_VV(it); /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */
77: Znext = VEC_VV(it + 1); /* This iteration will compute Znext, update with a deferred correction once we know how
78: * Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */
80: if (it < pgmres->max_k + 1 && ksp->its + 1 < PetscMax(2, ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */
81: KSP_PCApplyBAorAB(ksp, Zcur, Znext, VEC_TEMP_MATOP);
82: }
84: if (it > 1) { /* Complete the pending reduction */
85: VecNormEnd(VEC_VV(it - 1), NORM_2, &newnorm);
86: *HH(it - 1, it - 2) = newnorm;
87: }
88: if (it > 0) { /* Finish the reduction computing the latest column of H */
89: VecMDotEnd(Zcur, it, &(VEC_VV(0)), HH(0, it - 1));
90: }
92: if (it > 1) {
93: /* normalize the base vector from two iterations ago, basis is complete up to here */
94: VecScale(VEC_VV(it - 1), 1. / *HH(it - 1, it - 2));
96: KSPPGMRESUpdateHessenberg(ksp, it - 2, &hapend, &res);
97: pgmres->it = it - 2;
98: ksp->its++;
99: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
100: else ksp->rnorm = 0;
102: (*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP);
103: if (it < pgmres->max_k + 1 || ksp->reason || ksp->its == ksp->max_it) { /* Monitor if we are done or still iterating, but not before a restart. */
104: KSPLogResidualHistory(ksp, ksp->rnorm);
105: KSPMonitor(ksp, ksp->its, ksp->rnorm);
106: }
107: if (ksp->reason) break;
108: /* Catch error in happy breakdown and signal convergence and break from loop */
109: if (hapend) {
111: ksp->reason = KSP_DIVERGED_BREAKDOWN;
112: break;
113: }
115: if (!(it < pgmres->max_k + 1 && ksp->its < ksp->max_it)) break;
117: /* The it-2 column of H was not scaled when we computed Zcur, apply correction */
118: VecScale(Zcur, 1. / *HH(it - 1, it - 2));
119: /* And Znext computed in this iteration was computed using the under-scaled Zcur */
120: VecScale(Znext, 1. / *HH(it - 1, it - 2));
122: /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */
123: for (k = 0; k < it; k++) *HH(k, it - 1) /= *HH(it - 1, it - 2);
124: /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This
125: * column is complete except for HH(it,it-1) which we won't know until the next iteration. */
126: *HH(it - 1, it - 1) /= *HH(it - 1, it - 2);
127: }
129: if (it > 0) {
130: PetscScalar *work;
131: if (!pgmres->orthogwork) PetscMalloc1(pgmres->max_k + 2, &pgmres->orthogwork);
132: work = pgmres->orthogwork;
133: /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is
134: *
135: * Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1]
136: *
137: * where
138: *
139: * Z[j] = sum_{k=0}^j V[k] * H[k,j-1]
140: *
141: * substituting
142: *
143: * Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1]
144: *
145: * rearranging the iteration space from row-column to column-row
146: *
147: * Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1]
148: *
149: * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already
150: * been transformed to upper triangular form.
151: */
152: for (k = 0; k < it + 1; k++) {
153: work[k] = 0;
154: for (j = PetscMax(0, k - 1); j < it - 1; j++) work[k] -= *HES(k, j) * *HH(j, it - 1);
155: }
156: VecMAXPY(Znext, it + 1, work, &VEC_VV(0));
157: VecAXPY(Znext, -*HH(it - 1, it - 1), Zcur);
159: /* Orthogonalize Zcur against existing basis vectors. */
160: for (k = 0; k < it; k++) work[k] = -*HH(k, it - 1);
161: VecMAXPY(Zcur, it, work, &VEC_VV(0));
162: /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */
163: /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */
164: VecNormBegin(VEC_VV(it), NORM_2, &newnorm);
165: }
167: /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */
168: VecMDotBegin(Znext, it + 1, &VEC_VV(0), HH(0, it));
170: /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */
171: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext));
172: }
174: if (itcount) *itcount = it - 1; /* Number of iterations actually completed. */
176: /*
177: Down here we have to solve for the "best" coefficients of the Krylov
178: columns, add the solution values together, and possibly unwind the
179: preconditioning from the solution
180: */
181: /* Form the solution (or the solution so far) */
182: KSPPGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 2);
183: return 0;
184: }
186: /*
187: KSPSolve_PGMRES - This routine applies the PGMRES method.
189: Input Parameter:
190: . ksp - the Krylov space object that was set to use pgmres
192: Output Parameter:
193: . outits - number of iterations used
195: */
196: static PetscErrorCode KSPSolve_PGMRES(KSP ksp)
197: {
198: PetscInt its, itcount;
199: KSP_PGMRES *pgmres = (KSP_PGMRES *)ksp->data;
200: PetscBool guess_zero = ksp->guess_zero;
203: PetscObjectSAWsTakeAccess((PetscObject)ksp);
204: ksp->its = 0;
205: PetscObjectSAWsGrantAccess((PetscObject)ksp);
207: itcount = 0;
208: ksp->reason = KSP_CONVERGED_ITERATING;
209: while (!ksp->reason) {
210: KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs);
211: KSPPGMRESCycle(&its, ksp);
212: itcount += its;
213: if (itcount >= ksp->max_it) {
214: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
215: break;
216: }
217: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
218: }
219: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
220: return 0;
221: }
223: static PetscErrorCode KSPDestroy_PGMRES(KSP ksp)
224: {
225: KSPDestroy_GMRES(ksp);
226: return 0;
227: }
229: /*
230: KSPPGMRESBuildSoln - create the solution from the starting vector and the
231: current iterates.
233: Input parameters:
234: nrs - work area of size it + 1.
235: vguess - index of initial guess
236: vdest - index of result. Note that vguess may == vdest (replace
237: guess with the solution).
238: it - HH upper triangular part is a block of size (it+1) x (it+1)
240: This is an internal routine that knows about the PGMRES internals.
241: */
242: static PetscErrorCode KSPPGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it)
243: {
244: PetscScalar tt;
245: PetscInt k, j;
246: KSP_PGMRES *pgmres = (KSP_PGMRES *)(ksp->data);
248: /* Solve for solution vector that minimizes the residual */
250: if (it < 0) { /* no pgmres steps have been performed */
251: VecCopy(vguess, vdest); /* VecCopy() is smart, exits immediately if vguess == vdest */
252: return 0;
253: }
255: /* solve the upper triangular system - RS is the right side and HH is
256: the upper triangular matrix - put soln in nrs */
257: if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
258: else nrs[it] = 0.0;
260: for (k = it - 1; k >= 0; k--) {
261: tt = *RS(k);
262: for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
263: nrs[k] = tt / *HH(k, k);
264: }
266: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
267: VecZeroEntries(VEC_TEMP);
268: VecMAXPY(VEC_TEMP, it + 1, nrs, &VEC_VV(0));
269: KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP);
270: /* add solution to previous solution */
271: if (vdest == vguess) {
272: VecAXPY(vdest, 1.0, VEC_TEMP);
273: } else {
274: VecWAXPY(vdest, 1.0, VEC_TEMP, vguess);
275: }
276: return 0;
277: }
279: /*
281: KSPPGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
282: Return new residual.
284: input parameters:
286: . ksp - Krylov space object
287: . it - plane rotations are applied to the (it+1)th column of the
288: modified hessenberg (i.e. HH(:,it))
289: . hapend - PETSC_FALSE not happy breakdown ending.
291: output parameters:
292: . res - the new residual
294: */
295: /*
296: . it - column of the Hessenberg that is complete, PGMRES is actually computing two columns ahead of this
297: */
298: static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res)
299: {
300: PetscScalar *hh, *cc, *ss, *rs;
301: PetscInt j;
302: PetscReal hapbnd;
303: KSP_PGMRES *pgmres = (KSP_PGMRES *)(ksp->data);
305: hh = HH(0, it); /* pointer to beginning of column to update */
306: cc = CC(0); /* beginning of cosine rotations */
307: ss = SS(0); /* beginning of sine rotations */
308: rs = RS(0); /* right hand side of least squares system */
310: /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
311: for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
313: /* check for the happy breakdown */
314: hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pgmres->haptol);
315: if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
316: PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it)));
317: *hapend = PETSC_TRUE;
318: }
320: /* Apply all the previously computed plane rotations to the new column
321: of the Hessenberg matrix */
322: /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta),
323: and some refs have [c s ; -conj(s) c] (don't be confused!) */
325: for (j = 0; j < it; j++) {
326: PetscScalar hhj = hh[j];
327: hh[j] = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
328: hh[j + 1] = -ss[j] * hhj + cc[j] * hh[j + 1];
329: }
331: /*
332: compute the new plane rotation, and apply it to:
333: 1) the right-hand-side of the Hessenberg system (RS)
334: note: it affects RS(it) and RS(it+1)
335: 2) the new column of the Hessenberg matrix
336: note: it affects HH(it,it) which is currently pointed to
337: by hh and HH(it+1, it) (*(hh+1))
338: thus obtaining the updated value of the residual...
339: */
341: /* compute new plane rotation */
343: if (!*hapend) {
344: PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
345: if (delta == 0.0) {
346: ksp->reason = KSP_DIVERGED_NULL;
347: return 0;
348: }
350: cc[it] = hh[it] / delta; /* new cosine value */
351: ss[it] = hh[it + 1] / delta; /* new sine value */
353: hh[it] = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
354: rs[it + 1] = -ss[it] * rs[it];
355: rs[it] = PetscConj(cc[it]) * rs[it];
356: *res = PetscAbsScalar(rs[it + 1]);
357: } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
358: another rotation matrix (so RH doesn't change). The new residual is
359: always the new sine term times the residual from last time (RS(it)),
360: but now the new sine rotation would be zero...so the residual should
361: be zero...so we will multiply "zero" by the last residual. This might
362: not be exactly what we want to do here -could just return "zero". */
364: *res = 0.0;
365: }
366: return 0;
367: }
369: /*
370: KSPBuildSolution_PGMRES
372: Input Parameter:
373: . ksp - the Krylov space object
374: . ptr-
376: Output Parameter:
377: . result - the solution
379: Note: this calls KSPPGMRESBuildSoln - the same function that KSPPGMRESCycle
380: calls directly.
382: */
383: PetscErrorCode KSPBuildSolution_PGMRES(KSP ksp, Vec ptr, Vec *result)
384: {
385: KSP_PGMRES *pgmres = (KSP_PGMRES *)ksp->data;
387: if (!ptr) {
388: if (!pgmres->sol_temp) { VecDuplicate(ksp->vec_sol, &pgmres->sol_temp); }
389: ptr = pgmres->sol_temp;
390: }
391: if (!pgmres->nrs) {
392: /* allocate the work area */
393: PetscMalloc1(pgmres->max_k, &pgmres->nrs);
394: }
396: KSPPGMRESBuildSoln(pgmres->nrs, ksp->vec_sol, ptr, ksp, pgmres->it);
397: if (result) *result = ptr;
398: return 0;
399: }
401: PetscErrorCode KSPSetFromOptions_PGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
402: {
403: KSPSetFromOptions_GMRES(ksp, PetscOptionsObject);
404: PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined GMRES Options");
405: PetscOptionsHeadEnd();
406: return 0;
407: }
409: PetscErrorCode KSPReset_PGMRES(KSP ksp)
410: {
411: KSPReset_GMRES(ksp);
412: return 0;
413: }
415: /*MC
416: KSPPGMRES - Implements the Pipelined Generalized Minimal Residual method. [](sec_pipelineksp)
418: Options Database Keys:
419: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
420: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
421: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
422: vectors are allocated as needed)
423: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
424: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
425: . -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
426: stability of the classical Gram-Schmidt orthogonalization.
427: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
429: Level: beginner
431: Note:
432: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
433: See [](doc_faq_pipelined)
435: Reference:
436: Ghysels, Ashby, Meerbergen, Vanroose, Hiding global communication latencies in the GMRES algorithm on massively parallel machines, 2012.
438: Developer Note:
439: This object is subclassed off of `KSPGMRES`
441: .seealso: [](chapter_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPGMRES`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`,
442: `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
443: `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
444: `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`
445: M*/
447: PETSC_EXTERN PetscErrorCode KSPCreate_PGMRES(KSP ksp)
448: {
449: KSP_PGMRES *pgmres;
451: PetscNew(&pgmres);
453: ksp->data = (void *)pgmres;
454: ksp->ops->buildsolution = KSPBuildSolution_PGMRES;
455: ksp->ops->setup = KSPSetUp_PGMRES;
456: ksp->ops->solve = KSPSolve_PGMRES;
457: ksp->ops->reset = KSPReset_PGMRES;
458: ksp->ops->destroy = KSPDestroy_PGMRES;
459: ksp->ops->view = KSPView_GMRES;
460: ksp->ops->setfromoptions = KSPSetFromOptions_PGMRES;
461: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
462: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
464: KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3);
465: KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2);
466: KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1);
468: PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES);
469: PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES);
470: PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetOrthogonalization_C", KSPGMRESGetOrthogonalization_GMRES);
471: PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES);
472: PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES);
473: PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES);
474: PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetCGSRefinementType_C", KSPGMRESGetCGSRefinementType_GMRES);
476: pgmres->nextra_vecs = 1;
477: pgmres->haptol = 1.0e-30;
478: pgmres->q_preallocate = 0;
479: pgmres->delta_allocate = PGMRES_DELTA_DIRECTIONS;
480: pgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
481: pgmres->nrs = NULL;
482: pgmres->sol_temp = NULL;
483: pgmres->max_k = PGMRES_DEFAULT_MAXK;
484: pgmres->Rsvd = NULL;
485: pgmres->orthogwork = NULL;
486: pgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
487: return 0;
488: }