Actual source code: dgmres.c

  1: /*
  2:     Implements deflated GMRES.
  3: */

  5: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>

  7: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;

  9: #define GMRES_DELTA_DIRECTIONS 10
 10: #define GMRES_DEFAULT_MAXK     30
 11: static PetscErrorCode KSPDGMRESGetNewVectors(KSP, PetscInt);
 12: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP, PetscInt, PetscBool, PetscReal *);
 13: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);

 15: PetscErrorCode KSPDGMRESSetEigen(KSP ksp, PetscInt nb_eig)
 16: {
 17:   PetscTryMethod((ksp), "KSPDGMRESSetEigen_C", (KSP, PetscInt), (ksp, nb_eig));
 18:   return 0;
 19: }
 20: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp, PetscInt max_neig)
 21: {
 22:   PetscTryMethod((ksp), "KSPDGMRESSetMaxEigen_C", (KSP, PetscInt), (ksp, max_neig));
 23:   return 0;
 24: }
 25: PetscErrorCode KSPDGMRESForce(KSP ksp, PetscBool force)
 26: {
 27:   PetscTryMethod((ksp), "KSPDGMRESForce_C", (KSP, PetscBool), (ksp, force));
 28:   return 0;
 29: }
 30: PetscErrorCode KSPDGMRESSetRatio(KSP ksp, PetscReal ratio)
 31: {
 32:   PetscTryMethod((ksp), "KSPDGMRESSetRatio_C", (KSP, PetscReal), (ksp, ratio));
 33:   return 0;
 34: }
 35: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp, PetscInt *neig)
 36: {
 37:   PetscUseMethod((ksp), "KSPDGMRESComputeSchurForm_C", (KSP, PetscInt *), (ksp, neig));
 38:   return 0;
 39: }
 40: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp, PetscInt *curneigh)
 41: {
 42:   PetscUseMethod((ksp), "KSPDGMRESComputeDeflationData_C", (KSP, PetscInt *), (ksp, curneigh));
 43:   return 0;
 44: }
 45: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
 46: {
 47:   PetscUseMethod((ksp), "KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
 48:   return 0;
 49: }

 51: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
 52: {
 53:   PetscUseMethod((ksp), "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
 54:   return 0;
 55: }

 57: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
 58: {
 59:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
 60:   PetscInt    neig   = dgmres->neig + EIG_OFFSET;
 61:   PetscInt    max_k  = dgmres->max_k + 1;

 63:   KSPSetUp_GMRES(ksp);
 64:   if (!dgmres->neig) return 0;

 66:   /* Allocate workspace for the Schur vectors*/
 67:   PetscMalloc1(neig * max_k, &SR);
 68:   dgmres->wr    = NULL;
 69:   dgmres->wi    = NULL;
 70:   dgmres->perm  = NULL;
 71:   dgmres->modul = NULL;
 72:   dgmres->Q     = NULL;
 73:   dgmres->Z     = NULL;

 75:   UU   = NULL;
 76:   XX   = NULL;
 77:   MX   = NULL;
 78:   AUU  = NULL;
 79:   XMX  = NULL;
 80:   XMU  = NULL;
 81:   UMX  = NULL;
 82:   AUAU = NULL;
 83:   TT   = NULL;
 84:   TTF  = NULL;
 85:   INVP = NULL;
 86:   X1   = NULL;
 87:   X2   = NULL;
 88:   MU   = NULL;
 89:   return 0;
 90: }

 92: /*
 93:  Run GMRES, possibly with restart.  Return residual history if requested.
 94:  input parameters:

 96:  .       gmres  - structure containing parameters and work areas

 98:  output parameters:
 99:  .        nres    - residuals (from preconditioned system) at each step.
100:  If restarting, consider passing nres+it.  If null,
101:  ignored
102:  .        itcount - number of iterations used.  nres[0] to nres[itcount]
103:  are defined.  If null, ignored.

105:  Notes:
106:  On entry, the value in vector VEC_VV(0) should be the initial residual
107:  (this allows shortcuts where the initial preconditioned residual is 0).
108:  */
109: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount, KSP ksp)
110: {
111:   KSP_DGMRES *dgmres = (KSP_DGMRES *)(ksp->data);
112:   PetscReal   res_norm, res, hapbnd, tt;
113:   PetscInt    it     = 0;
114:   PetscInt    max_k  = dgmres->max_k;
115:   PetscBool   hapend = PETSC_FALSE;
116:   PetscReal   res_old;
117:   PetscInt    test = 0;

119:   VecNormalize(VEC_VV(0), &res_norm);
120:   KSPCheckNorm(ksp, res_norm);
121:   res     = res_norm;
122:   *GRS(0) = res_norm;

124:   /* check for the convergence */
125:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
126:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
127:   else ksp->rnorm = 0.0;
128:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
129:   dgmres->it = (it - 1);
130:   KSPLogResidualHistory(ksp, ksp->rnorm);
131:   KSPMonitor(ksp, ksp->its, ksp->rnorm);
132:   if (!res) {
133:     if (itcount) *itcount = 0;
134:     ksp->reason = KSP_CONVERGED_ATOL;
135:     PetscInfo(ksp, "Converged due to zero residual norm on entry\n");
136:     return 0;
137:   }
138:   /* record the residual norm to test if deflation is needed */
139:   res_old = res;

141:   (*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP);
142:   while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
143:     if (it) {
144:       KSPLogResidualHistory(ksp, ksp->rnorm);
145:       KSPMonitor(ksp, ksp->its, ksp->rnorm);
146:     }
147:     dgmres->it = (it - 1);
148:     if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) KSPDGMRESGetNewVectors(ksp, it + 1);
149:     if (dgmres->r > 0) {
150:       if (ksp->pc_side == PC_LEFT) {
151:         /* Apply the first preconditioner */
152:         KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_TEMP, VEC_TEMP_MATOP);
153:         /* Then apply Deflation as a preconditioner */
154:         KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1 + it));
155:       } else if (ksp->pc_side == PC_RIGHT) {
156:         KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
157:         KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1 + it), VEC_TEMP_MATOP);
158:       }
159:     } else {
160:       KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_VV(1 + it), VEC_TEMP_MATOP);
161:     }
162:     dgmres->matvecs += 1;
163:     /* update hessenberg matrix and do Gram-Schmidt */
164:     (*dgmres->orthog)(ksp, it);

166:     /* vv(i+1) . vv(i+1) */
167:     VecNormalize(VEC_VV(it + 1), &tt);
168:     /* save the magnitude */
169:     *HH(it + 1, it)  = tt;
170:     *HES(it + 1, it) = tt;

172:     /* check for the happy breakdown */
173:     hapbnd = PetscAbsScalar(tt / *GRS(it));
174:     if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
175:     if (tt < hapbnd) {
176:       PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %g tt = %g\n", (double)hapbnd, (double)tt);
177:       hapend = PETSC_TRUE;
178:     }
179:     KSPDGMRESUpdateHessenberg(ksp, it, hapend, &res);

181:     it++;
182:     dgmres->it = (it - 1); /* For converged */
183:     ksp->its++;
184:     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
185:     else ksp->rnorm = 0.0;
186:     if (ksp->reason) break;

188:     (*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP);

190:     /* Catch error in happy breakdown and signal convergence and break from loop */
191:     if (hapend) {
192:       if (!ksp->reason) {
194:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
195:         break;
196:       }
197:     }
198:   }

200:   /* Monitor if we know that we will not return for a restart */
201:   if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
202:     KSPLogResidualHistory(ksp, ksp->rnorm);
203:     KSPMonitor(ksp, ksp->its, ksp->rnorm);
204:   }
205:   if (itcount) *itcount = it;

207:   /*
208:    Down here we have to solve for the "best" coefficients of the Krylov
209:    columns, add the solution values together, and possibly unwind the
210:    preconditioning from the solution
211:    */
212:   /* Form the solution (or the solution so far) */
213:   KSPDGMRESBuildSoln(GRS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 1);

215:   /* Compute data for the deflation to be used during the next restart */
216:   if (!ksp->reason && ksp->its < ksp->max_it) {
217:     test = max_k * PetscLogReal(ksp->rtol / res) / PetscLogReal(res / res_old);
218:     /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
219:     if ((test > dgmres->smv * (ksp->max_it - ksp->its)) || dgmres->force) KSPDGMRESComputeDeflationData(ksp, NULL);
220:   }
221:   return 0;
222: }

224: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
225: {
226:   PetscInt    i, its, itcount;
227:   KSP_DGMRES *dgmres     = (KSP_DGMRES *)ksp->data;
228:   PetscBool   guess_zero = ksp->guess_zero;


232:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
233:   ksp->its        = 0;
234:   dgmres->matvecs = 0;
235:   PetscObjectSAWsGrantAccess((PetscObject)ksp);

237:   itcount = 0;
238:   while (!ksp->reason) {
239:     KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs);
240:     if (ksp->pc_side == PC_LEFT) {
241:       dgmres->matvecs += 1;
242:       if (dgmres->r > 0) {
243:         KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
244:         VecCopy(VEC_TEMP, VEC_VV(0));
245:       }
246:     }

248:     KSPDGMRESCycle(&its, ksp);
249:     itcount += its;
250:     if (itcount >= ksp->max_it) {
251:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
252:       break;
253:     }
254:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
255:   }
256:   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */

258:   for (i = 0; i < dgmres->r; i++) VecViewFromOptions(UU[i], (PetscObject)ksp, "-ksp_dgmres_view_deflation_vecs");
259:   return 0;
260: }

262: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
263: {
264:   KSP_DGMRES *dgmres   = (KSP_DGMRES *)ksp->data;
265:   PetscInt    neig1    = dgmres->neig + EIG_OFFSET;
266:   PetscInt    max_neig = dgmres->max_neig;

268:   if (dgmres->r) {
269:     VecDestroyVecs(max_neig, &UU);
270:     VecDestroyVecs(max_neig, &MU);
271:     if (XX) {
272:       VecDestroyVecs(neig1, &XX);
273:       VecDestroyVecs(neig1, &MX);
274:     }
275:     PetscFree(TT);
276:     PetscFree(TTF);
277:     PetscFree(INVP);
278:     PetscFree(XMX);
279:     PetscFree(UMX);
280:     PetscFree(XMU);
281:     PetscFree(X1);
282:     PetscFree(X2);
283:     PetscFree(dgmres->work);
284:     PetscFree(dgmres->iwork);
285:     PetscFree(dgmres->wr);
286:     PetscFree(dgmres->wi);
287:     PetscFree(dgmres->modul);
288:     PetscFree(dgmres->Q);
289:     PetscFree(ORTH);
290:     PetscFree(AUAU);
291:     PetscFree(AUU);
292:     PetscFree(SR2);
293:   }
294:   PetscFree(SR);
295:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", NULL);
296:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", NULL);
297:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", NULL);
298:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", NULL);
299:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", NULL);
300:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", NULL);
301:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", NULL);
302:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", NULL);
303:   KSPDestroy_GMRES(ksp);
304:   return 0;
305: }

307: /*
308:  KSPDGMRESBuildSoln - create the solution from the starting vector and the
309:  current iterates.

311:  Input parameters:
312:  nrs - work area of size it + 1.
313:  vs  - index of initial guess
314:  vdest - index of result.  Note that vs may == vdest (replace
315:  guess with the solution).

317:  This is an internal routine that knows about the GMRES internals.
318:  */
319: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs, Vec vs, Vec vdest, KSP ksp, PetscInt it)
320: {
321:   PetscScalar tt;
322:   PetscInt    ii, k, j;
323:   KSP_DGMRES *dgmres = (KSP_DGMRES *)(ksp->data);

325:   /* Solve for solution vector that minimizes the residual */

327:   /* If it is < 0, no gmres steps have been performed */
328:   if (it < 0) {
329:     VecCopy(vs, vdest); /* VecCopy() is smart, exists immediately if vguess == vdest */
330:     return 0;
331:   }
333:   if (*HH(it, it) != 0.0) nrs[it] = *GRS(it) / *HH(it, it);
334:   else nrs[it] = 0.0;

336:   for (ii = 1; ii <= it; ii++) {
337:     k  = it - ii;
338:     tt = *GRS(k);
339:     for (j = k + 1; j <= it; j++) tt = tt - *HH(k, j) * nrs[j];
341:     nrs[k] = tt / *HH(k, k);
342:   }

344:   /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
345:   VecSet(VEC_TEMP, 0.0);
346:   VecMAXPY(VEC_TEMP, it + 1, nrs, &VEC_VV(0));

348:   /* Apply deflation */
349:   if (ksp->pc_side == PC_RIGHT && dgmres->r > 0) {
350:     KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
351:     VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
352:   }
353:   KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP);

355:   /* add solution to previous solution */
356:   if (vdest != vs) VecCopy(vs, vdest);
357:   VecAXPY(vdest, 1.0, VEC_TEMP);
358:   return 0;
359: }

361: /*
362:  Do the scalar work for the orthogonalization.  Return new residual norm.
363:  */
364: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool hapend, PetscReal *res)
365: {
366:   PetscScalar *hh, *cc, *ss, tt;
367:   PetscInt     j;
368:   KSP_DGMRES  *dgmres = (KSP_DGMRES *)(ksp->data);

370:   hh = HH(0, it);
371:   cc = CC(0);
372:   ss = SS(0);

374:   /* Apply all the previously computed plane rotations to the new column
375:    of the Hessenberg matrix */
376:   for (j = 1; j <= it; j++) {
377:     tt  = *hh;
378:     *hh = PetscConj(*cc) * tt + *ss * *(hh + 1);
379:     hh++;
380:     *hh = *cc++ * *hh - (*ss++ * tt);
381:   }

383:   /*
384:    compute the new plane rotation, and apply it to:
385:    1) the right-hand-side of the Hessenberg system
386:    2) the new column of the Hessenberg matrix
387:    thus obtaining the updated value of the residual
388:    */
389:   if (!hapend) {
390:     tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh + 1)) * *(hh + 1));
391:     if (tt == 0.0) {
392:       ksp->reason = KSP_DIVERGED_NULL;
393:       return 0;
394:     }
395:     *cc          = *hh / tt;
396:     *ss          = *(hh + 1) / tt;
397:     *GRS(it + 1) = -(*ss * *GRS(it));
398:     *GRS(it)     = PetscConj(*cc) * *GRS(it);
399:     *hh          = PetscConj(*cc) * *hh + *ss * *(hh + 1);
400:     *res         = PetscAbsScalar(*GRS(it + 1));
401:   } else {
402:     /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
403:      another rotation matrix (so RH doesn't change).  The new residual is
404:      always the new sine term times the residual from last time (GRS(it)),
405:      but now the new sine rotation would be zero...so the residual should
406:      be zero...so we will multiply "zero" by the last residual.  This might
407:      not be exactly what we want to do here -could just return "zero". */

409:     *res = 0.0;
410:   }
411:   return 0;
412: }

414: /*
415:   Allocates more work vectors, starting from VEC_VV(it).
416:  */
417: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp, PetscInt it)
418: {
419:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
420:   PetscInt    nwork  = dgmres->nwork_alloc, k, nalloc;

422:   nalloc = PetscMin(ksp->max_it, dgmres->delta_allocate);
423:   /* Adjust the number to allocate to make sure that we don't exceed the
424:    number of available slots */
425:   if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
426:   if (!nalloc) return 0;

428:   dgmres->vv_allocated += nalloc;

430:   KSPCreateVecs(ksp, nalloc, &dgmres->user_work[nwork], 0, NULL);

432:   dgmres->mwork_alloc[nwork] = nalloc;
433:   for (k = 0; k < nalloc; k++) dgmres->vecs[it + VEC_OFFSET + k] = dgmres->user_work[nwork][k];
434:   dgmres->nwork_alloc++;
435:   return 0;
436: }

438: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp, Vec ptr, Vec *result)
439: {
440:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

442:   if (!ptr) {
443:     if (!dgmres->sol_temp) { VecDuplicate(ksp->vec_sol, &dgmres->sol_temp); }
444:     ptr = dgmres->sol_temp;
445:   }
446:   if (!dgmres->nrs) {
447:     /* allocate the work area */
448:     PetscMalloc1(dgmres->max_k, &dgmres->nrs);
449:   }
450:   KSPDGMRESBuildSoln(dgmres->nrs, ksp->vec_sol, ptr, ksp, dgmres->it);
451:   if (result) *result = ptr;
452:   return 0;
453: }

455: PetscErrorCode KSPView_DGMRES(KSP ksp, PetscViewer viewer)
456: {
457:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
458:   PetscBool   iascii, isharmonic;

460:   KSPView_GMRES(ksp, viewer);
461:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
462:   if (iascii) {
463:     PetscViewerASCIIPrintf(viewer, "    Adaptive strategy is used: %s\n", PetscBools[dgmres->force]);
464:     PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic);
465:     if (isharmonic) {
466:       PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %" PetscInt_FMT " using Harmonic Ritz values \n", dgmres->neig);
467:     } else {
468:       PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %" PetscInt_FMT " using Ritz values \n", dgmres->neig);
469:     }
470:     PetscViewerASCIIPrintf(viewer, "   Total number of extracted eigenvalues = %" PetscInt_FMT "\n", dgmres->r);
471:     PetscViewerASCIIPrintf(viewer, "   Maximum number of eigenvalues set to be extracted = %" PetscInt_FMT "\n", dgmres->max_neig);
472:     PetscViewerASCIIPrintf(viewer, "   relaxation parameter for the adaptive strategy(smv)  = %g\n", (double)dgmres->smv);
473:     PetscViewerASCIIPrintf(viewer, "   Number of matvecs : %" PetscInt_FMT "\n", dgmres->matvecs);
474:   }
475:   return 0;
476: }

478: PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp, PetscInt neig)
479: {
480:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

483:   dgmres->neig = neig;
484:   return 0;
485: }

487: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp, PetscInt max_neig)
488: {
489:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

492:   dgmres->max_neig = max_neig;
493:   return 0;
494: }

496: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp, PetscReal ratio)
497: {
498:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

501:   dgmres->smv = ratio;
502:   return 0;
503: }

505: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp, PetscBool force)
506: {
507:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;

509:   dgmres->force = force;
510:   return 0;
511: }

513: PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
514: {
515:   PetscInt    neig;
516:   PetscInt    max_neig;
517:   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
518:   PetscBool   flg;

520:   KSPSetFromOptions_GMRES(ksp, PetscOptionsObject);
521:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP DGMRES Options");
522:   PetscOptionsInt("-ksp_dgmres_eigen", "Number of smallest eigenvalues to extract at each restart", "KSPDGMRESSetEigen", dgmres->neig, &neig, &flg);
523:   if (flg) KSPDGMRESSetEigen(ksp, neig);
524:   PetscOptionsInt("-ksp_dgmres_max_eigen", "Maximum Number of smallest eigenvalues to extract ", "KSPDGMRESSetMaxEigen", dgmres->max_neig, &max_neig, &flg);
525:   if (flg) KSPDGMRESSetMaxEigen(ksp, max_neig);
526:   PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &dgmres->smv, NULL);
527:   PetscOptionsBool("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &dgmres->improve, NULL);
528:   PetscOptionsBool("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &dgmres->force, NULL);
529:   PetscOptionsHeadEnd();
530:   return 0;
531: }

533: PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
534: {
535:   KSP_DGMRES  *dgmres = (KSP_DGMRES *)ksp->data;
536:   PetscInt     i, j, k;
537:   PetscBLASInt nr, bmax;
538:   PetscInt     r = dgmres->r;
539:   PetscInt     neig;                                 /* number of eigenvalues to extract at each restart */
540:   PetscInt     neig1    = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
541:   PetscInt     max_neig = dgmres->max_neig;          /* Max number of eigenvalues to extract during the iterative process */
542:   PetscInt     N        = dgmres->max_k + 1;
543:   PetscInt     n        = dgmres->it + 1;
544:   PetscReal    alpha;

546:   PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0);
547:   if (dgmres->neig == 0 || (max_neig < (r + neig1) && !dgmres->improve)) {
548:     PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0);
549:     return 0;
550:   }

552:   KSPDGMRESComputeSchurForm(ksp, &neig);
553:   /* Form the extended Schur vectors X=VV*Sr */
554:   if (!XX) VecDuplicateVecs(VEC_VV(0), neig1, &XX);
555:   for (j = 0; j < neig; j++) {
556:     VecZeroEntries(XX[j]);
557:     VecMAXPY(XX[j], n, &SR[j * N], &VEC_VV(0));
558:   }

560:   /* Orthogonalize X against U */
561:   if (!ORTH) PetscMalloc1(max_neig, &ORTH);
562:   if (r > 0) {
563:     /* modified Gram-Schmidt */
564:     for (j = 0; j < neig; j++) {
565:       for (i = 0; i < r; i++) {
566:         /* First, compute U'*X[j] */
567:         VecDot(XX[j], UU[i], &alpha);
568:         /* Then, compute X(j)=X(j)-U*U'*X(j) */
569:         VecAXPY(XX[j], -alpha, UU[i]);
570:       }
571:     }
572:   }
573:   /* Compute MX = M^{-1}*A*X */
574:   if (!MX) VecDuplicateVecs(VEC_VV(0), neig1, &MX);
575:   for (j = 0; j < neig; j++) KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
576:   dgmres->matvecs += neig;

578:   if ((r + neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
579:     KSPDGMRESImproveEig(ksp, neig);
580:     PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0);
581:     return 0; /* We return here since data for M have been improved in  KSPDGMRESImproveEig()*/
582:   }

584:   /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
585:   if (!XMX) PetscMalloc1(neig1 * neig1, &XMX);
586:   for (j = 0; j < neig; j++) VecMDot(MX[j], neig, XX, &(XMX[j * neig1]));

588:   if (r > 0) {
589:     /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
590:     if (!UMX) PetscMalloc1(max_neig * neig1, &UMX);
591:     for (j = 0; j < neig; j++) VecMDot(MX[j], r, UU, &(UMX[j * max_neig]));
592:     /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
593:     if (!XMU) PetscMalloc1(max_neig * neig1, &XMU);
594:     for (j = 0; j < r; j++) VecMDot(MU[j], neig, XX, &(XMU[j * neig1]));
595:   }

597:   /* Form the new matrix T = [T UMX; XMU XMX]; */
598:   if (!TT) PetscMalloc1(max_neig * max_neig, &TT);
599:   if (r > 0) {
600:     /* Add XMU to T */
601:     for (j = 0; j < r; j++) PetscArraycpy(&(TT[max_neig * j + r]), &(XMU[neig1 * j]), neig);
602:     /* Add [UMX; XMX] to T */
603:     for (j = 0; j < neig; j++) {
604:       k = r + j;
605:       PetscArraycpy(&(TT[max_neig * k]), &(UMX[max_neig * j]), r);
606:       PetscArraycpy(&(TT[max_neig * k + r]), &(XMX[neig1 * j]), neig);
607:     }
608:   } else { /* Add XMX to T */
609:     for (j = 0; j < neig; j++) PetscArraycpy(&(TT[max_neig * j]), &(XMX[neig1 * j]), neig);
610:   }

612:   dgmres->r += neig;
613:   r = dgmres->r;
614:   PetscBLASIntCast(r, &nr);
615:   /*LU Factorize T with Lapack xgetrf routine */

617:   PetscBLASIntCast(max_neig, &bmax);
618:   if (!TTF) PetscMalloc1(bmax * bmax, &TTF);
619:   PetscArraycpy(TTF, TT, bmax * r);
620:   if (!INVP) PetscMalloc1(bmax, &INVP);
621:   {
622:     PetscBLASInt info;
623:     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
625:   }

627:   /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
628:   if (!UU) {
629:     VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
630:     VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
631:   }
632:   for (j = 0; j < neig; j++) {
633:     VecCopy(XX[j], UU[r - neig + j]);
634:     VecCopy(MX[j], MU[r - neig + j]);
635:   }
636:   PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0);
637:   return 0;
638: }

640: PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
641: {
642:   KSP_DGMRES   *dgmres = (KSP_DGMRES *)ksp->data;
643:   PetscInt      N = dgmres->max_k + 1, n = dgmres->it + 1;
644:   PetscBLASInt  bn;
645:   PetscReal    *A;
646:   PetscBLASInt  ihi;
647:   PetscBLASInt  ldA = 0; /* leading dimension of A */
648:   PetscBLASInt  ldQ;     /* leading dimension of Q */
649:   PetscReal    *Q;       /*  orthogonal matrix of  (left) Schur vectors */
650:   PetscReal    *work;    /* working vector */
651:   PetscBLASInt  lwork;   /* size of the working vector */
652:   PetscInt     *perm;    /* Permutation vector to sort eigenvalues */
653:   PetscInt      i, j;
654:   PetscBLASInt  NbrEig;          /* Number of eigenvalues really extracted */
655:   PetscReal    *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A */
656:   PetscBLASInt *select;
657:   PetscBLASInt *iwork;
658:   PetscBLASInt  liwork;
659:   PetscScalar  *Ht;   /* Transpose of the Hessenberg matrix */
660:   PetscScalar  *t;    /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
661:   PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
662:   PetscBool     flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */

664:   PetscBLASIntCast(n, &bn);
665:   PetscBLASIntCast(N, &ldA);
666:   ihi = ldQ = bn;
667:   PetscBLASIntCast(5 * N, &lwork);

669: #if defined(PETSC_USE_COMPLEX)
670:   SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "No support for complex numbers.");
671: #endif

673:   PetscMalloc1(ldA * ldA, &A);
674:   PetscMalloc1(ldQ * n, &Q);
675:   PetscMalloc1(lwork, &work);
676:   if (!dgmres->wr) {
677:     PetscMalloc1(n, &dgmres->wr);
678:     PetscMalloc1(n, &dgmres->wi);
679:   }
680:   wr = dgmres->wr;
681:   wi = dgmres->wi;
682:   PetscMalloc1(n, &modul);
683:   PetscMalloc1(n, &perm);
684:   /* copy the Hessenberg matrix to work space */
685:   PetscArraycpy(A, dgmres->hes_origin, ldA * ldA);
686:   PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag);
687:   if (flag) {
688:     /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
689:     /* Transpose the Hessenberg matrix */
690:     PetscMalloc1(bn * bn, &Ht);
691:     for (i = 0; i < bn; i++) {
692:       for (j = 0; j < bn; j++) Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
693:     }

695:     /* Solve the system H^T*t = h_{m+1,m}e_m */
696:     PetscCalloc1(bn, &t);
697:     t[bn - 1] = dgmres->hes_origin[(bn - 1) * ldA + bn]; /* Pick the last element H(m+1,m) */
698:     PetscMalloc1(bn, &ipiv);
699:     /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
700:     {
701:       PetscBLASInt info;
702:       PetscBLASInt nrhs = 1;
703:       PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
705:     }
706:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
707:     for (i = 0; i < bn; i++) A[(bn - 1) * bn + i] += t[i];
708:     PetscFree(t);
709:     PetscFree(Ht);
710:   }
711:   /* Compute eigenvalues with the Schur form */
712:   {
713:     PetscBLASInt info = 0;
714:     PetscBLASInt ilo  = 1;
715:     PetscCallBLAS("LAPACKhseqr", LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
717:   }
718:   PetscFree(work);

720:   /* sort the eigenvalues */
721:   for (i = 0; i < n; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
722:   for (i = 0; i < n; i++) perm[i] = i;

724:   PetscSortRealWithPermutation(n, modul, perm);
725:   /* save the complex modulus of the largest eigenvalue in magnitude */
726:   if (dgmres->lambdaN < modul[perm[n - 1]]) dgmres->lambdaN = modul[perm[n - 1]];
727:   /* count the number of extracted eigenvalues (with complex conjugates) */
728:   NbrEig = 0;
729:   while (NbrEig < dgmres->neig) {
730:     if (wi[perm[NbrEig]] != 0) NbrEig += 2;
731:     else NbrEig += 1;
732:   }
733:   /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */

735:   PetscCalloc1(n, &select);

737:   if (!dgmres->GreatestEig) {
738:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
739:   } else {
740:     for (j = 0; j < NbrEig; j++) select[perm[n - j - 1]] = 1;
741:   }
742:   /* call Lapack dtrsen */
743:   lwork  = PetscMax(1, 4 * NbrEig * (bn - NbrEig));
744:   liwork = PetscMax(1, 2 * NbrEig * (bn - NbrEig));
745:   PetscMalloc1(lwork, &work);
746:   PetscMalloc1(liwork, &iwork);
747:   {
748:     PetscBLASInt info = 0;
749:     PetscReal    CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
750:     PetscReal    CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
751:     PetscCallBLAS("LAPACKtrsen", LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
753:   }
754:   PetscFree(select);

756:   /* Extract the Schur vectors */
757:   for (j = 0; j < NbrEig; j++) PetscArraycpy(&SR[j * N], &(Q[j * ldQ]), n);
758:   *neig = NbrEig;
759:   PetscFree(A);
760:   PetscFree(work);
761:   PetscFree(perm);
762:   PetscFree(work);
763:   PetscFree(iwork);
764:   PetscFree(modul);
765:   PetscFree(Q);
766:   return 0;
767: }

769: PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
770: {
771:   KSP_DGMRES  *dgmres = (KSP_DGMRES *)ksp->data;
772:   PetscInt     i, r = dgmres->r;
773:   PetscReal    alpha    = 1.0;
774:   PetscInt     max_neig = dgmres->max_neig;
775:   PetscBLASInt br, bmax;
776:   PetscReal    lambda = dgmres->lambdaN;

778:   PetscBLASIntCast(r, &br);
779:   PetscBLASIntCast(max_neig, &bmax);
780:   PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
781:   if (!r) {
782:     VecCopy(x, y);
783:     return 0;
784:   }
785:   /* Compute U'*x */
786:   if (!X1) {
787:     PetscMalloc1(bmax, &X1);
788:     PetscMalloc1(bmax, &X2);
789:   }
790:   VecMDot(x, r, UU, X1);

792:   /* Solve T*X1=X2 for X1*/
793:   PetscArraycpy(X2, X1, br);
794:   {
795:     PetscBLASInt info;
796:     PetscBLASInt nrhs = 1;
797:     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
799:   }
800:   /* Iterative refinement -- is it really necessary ?? */
801:   if (!WORK) {
802:     PetscMalloc1(3 * bmax, &WORK);
803:     PetscMalloc1(bmax, &IWORK);
804:   }
805:   {
806:     PetscBLASInt info;
807:     PetscReal    berr, ferr;
808:     PetscBLASInt nrhs = 1;
809:     PetscCallBLAS("LAPACKgerfs", LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax, X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
811:   }

813:   for (i = 0; i < r; i++) X2[i] = X1[i] / lambda - X2[i];

815:   /* Compute X2=U*X2 */
816:   VecZeroEntries(y);
817:   VecMAXPY(y, r, X2, UU);
818:   VecAXPY(y, alpha, x);

820:   PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
821:   return 0;
822: }

824: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
825: {
826:   KSP_DGMRES   *dgmres = (KSP_DGMRES *)ksp->data;
827:   PetscInt      j, r_old, r = dgmres->r;
828:   PetscBLASInt  i     = 0;
829:   PetscInt      neig1 = dgmres->neig + EIG_OFFSET;
830:   PetscInt      bmax  = dgmres->max_neig;
831:   PetscInt      aug   = r + neig;       /* actual size of the augmented invariant basis */
832:   PetscInt      aug1  = bmax + neig1;   /* maximum size of the augmented invariant basis */
833:   PetscBLASInt  ldA;                    /* leading dimension of AUAU and AUU*/
834:   PetscBLASInt  N;                      /* size of AUAU */
835:   PetscReal    *Q;                      /*  orthogonal matrix of  (left) schur vectors */
836:   PetscReal    *Z;                      /*  orthogonal matrix of  (right) schur vectors */
837:   PetscReal    *work;                   /* working vector */
838:   PetscBLASInt  lwork;                  /* size of the working vector */
839:   PetscInt     *perm;                   /* Permutation vector to sort eigenvalues */
840:   PetscReal    *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
841:   PetscBLASInt  NbrEig = 0, nr, bm;
842:   PetscBLASInt *select;
843:   PetscBLASInt  liwork, *iwork;

845:   /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
846:   if (!AUU) {
847:     PetscMalloc1(aug1 * aug1, &AUU);
848:     PetscMalloc1(aug1 * aug1, &AUAU);
849:   }
850:   /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
851:    * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
852:   /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
853:   for (j = 0; j < r; j++) VecMDot(UU[j], r, MU, &AUU[j * aug1]);
854:   /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
855:   for (j = 0; j < neig; j++) VecMDot(XX[j], r, MU, &AUU[(r + j) * aug1]);
856:   /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
857:   for (j = 0; j < r; j++) VecMDot(UU[j], neig, MX, &AUU[j * aug1 + r]);
858:   /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
859:   for (j = 0; j < neig; j++) VecMDot(XX[j], neig, MX, &AUU[(r + j) * aug1 + r]);

861:   /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
862:   /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
863:   for (j = 0; j < r; j++) VecMDot(MU[j], r, MU, &AUAU[j * aug1]);
864:   /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
865:   for (j = 0; j < neig; j++) VecMDot(MX[j], r, MU, &AUAU[(r + j) * aug1]);
866:   /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
867:   for (j = 0; j < r; j++) VecMDot(MU[j], neig, MX, &AUAU[j * aug1 + r]);
868:   /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
869:   for (j = 0; j < neig; j++) VecMDot(MX[j], neig, MX, &AUAU[(r + j) * aug1 + r]);

871:   /* Computation of the eigenvectors */
872:   PetscBLASIntCast(aug1, &ldA);
873:   PetscBLASIntCast(aug, &N);
874:   lwork = 8 * N + 20; /* sizeof the working space */
875:   PetscMalloc1(N, &wr);
876:   PetscMalloc1(N, &wi);
877:   PetscMalloc1(N, &beta);
878:   PetscMalloc1(N, &modul);
879:   PetscMalloc1(N, &perm);
880:   PetscMalloc1(N * N, &Q);
881:   PetscMalloc1(N * N, &Z);
882:   PetscMalloc1(lwork, &work);
883:   {
884:     PetscBLASInt info = 0;
885:     PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
887:   }
888:   for (i = 0; i < N; i++) {
889:     if (beta[i] != 0.0) {
890:       wr[i] /= beta[i];
891:       wi[i] /= beta[i];
892:     }
893:   }
894:   /* sort the eigenvalues */
895:   for (i = 0; i < N; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
896:   for (i = 0; i < N; i++) perm[i] = i;
897:   PetscSortRealWithPermutation(N, modul, perm);
898:   /* Save the norm of the largest eigenvalue */
899:   if (dgmres->lambdaN < modul[perm[N - 1]]) dgmres->lambdaN = modul[perm[N - 1]];
900:   /* Allocate space to extract the first r schur vectors   */
901:   if (!SR2) PetscMalloc1(aug1 * bmax, &SR2);
902:   /* count the number of extracted eigenvalues (complex conjugates count as 2) */
903:   while (NbrEig < bmax) {
904:     if (wi[perm[NbrEig]] == 0) NbrEig += 1;
905:     else NbrEig += 2;
906:   }
907:   if (NbrEig > bmax) NbrEig = bmax - 1;
908:   r_old     = r; /* previous size of r */
909:   dgmres->r = r = NbrEig;

911:   /* Select the eigenvalues to reorder */
912:   PetscCalloc1(N, &select);
913:   if (!dgmres->GreatestEig) {
914:     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
915:   } else {
916:     for (j = 0; j < NbrEig; j++) select[perm[N - j - 1]] = 1;
917:   }
918:   /* Reorder and extract the new <r> schur vectors */
919:   lwork  = PetscMax(4 * N + 16, 2 * NbrEig * (N - NbrEig));
920:   liwork = PetscMax(N + 6, 2 * NbrEig * (N - NbrEig));
921:   PetscFree(work);
922:   PetscMalloc1(lwork, &work);
923:   PetscMalloc1(liwork, &iwork);
924:   {
925:     PetscBLASInt info = 0;
926:     PetscReal    Dif[2];
927:     PetscBLASInt ijob  = 2;
928:     PetscBLASInt wantQ = 1, wantZ = 1;
929:     PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
931:   }
932:   PetscFree(select);

934:   for (j = 0; j < r; j++) PetscArraycpy(&SR2[j * aug1], &(Z[j * N]), N);

936:   /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
937:    -- save it temporarily in MU */
938:   for (j = 0; j < r; j++) {
939:     VecZeroEntries(MU[j]);
940:     VecMAXPY(MU[j], r_old, &SR2[j * aug1], UU);
941:     VecMAXPY(MU[j], neig, &SR2[j * aug1 + r_old], XX);
942:   }
943:   /* Form T = U'*MU*U */
944:   for (j = 0; j < r; j++) {
945:     VecCopy(MU[j], UU[j]);
946:     KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
947:   }
948:   dgmres->matvecs += r;
949:   for (j = 0; j < r; j++) VecMDot(MU[j], r, UU, &TT[j * bmax]);
950:   /* Factorize T */
951:   PetscArraycpy(TTF, TT, bmax * r);
952:   PetscBLASIntCast(r, &nr);
953:   PetscBLASIntCast(bmax, &bm);
954:   {
955:     PetscBLASInt info;
956:     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
958:   }
959:   /* Free Memory */
960:   PetscFree(wr);
961:   PetscFree(wi);
962:   PetscFree(beta);
963:   PetscFree(modul);
964:   PetscFree(perm);
965:   PetscFree(Q);
966:   PetscFree(Z);
967:   PetscFree(work);
968:   PetscFree(iwork);
969:   return 0;
970: }

972: /*MC
973:      KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
974:                  In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
975:                  stagnation occurs.

977:    Options Database Keys:
978:    GMRES Options (inherited):
979: +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
980: .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
981: .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
982:                              vectors are allocated as needed)
983: .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
984: .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
985: .   -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
986:                                    stability of the classical Gram-Schmidt  orthogonalization.
987: -   -ksp_gmres_krylov_monitor - plot the Krylov space generated

989:    DGMRES Options Database Keys:
990: +   -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
991: .   -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
992:                                        process
993: .   -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
994: -   -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
995:                                                    parsed by `PetscOptionsGetViewer()`.  If neig > 1, viewerspec should
996:                                                    end with ":append".  No vectors will be viewed if the adaptive
997:                                                    strategy chooses not to deflate, so -ksp_dgmres_force should also
998:                                                    be given.
999:                                                    The deflation vectors span a subspace that may be a good
1000:                                                    approximation of the subspace of smallest eigenvectors of the
1001:                                                    preconditioned operator, so this option can aid in understanding
1002:                                                    the performance of a preconditioner.

1004:  Level: beginner

1006:  Note:
1007:     Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported

1009:  References:
1010: + [1] - J. Erhel, K. Burrage and B. Pohl,  Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1011: - [2] - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1012:    In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023

1014:  Contributed by:
1015:   Desire NUENTSA WAKAM, INRIA

1017:  .seealso: [](chapter_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPFGMRES`, `KSPLGMRES`,
1018:            `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
1019:            `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
1020:            `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`, `KSPSetPCSide()`
1021:  M*/

1023: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1024: {
1025:   KSP_DGMRES *dgmres;

1027:   PetscNew(&dgmres);
1028:   ksp->data = (void *)dgmres;

1030:   KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3);
1031:   KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2);
1032:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1);

1034:   ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1035:   ksp->ops->setup                        = KSPSetUp_DGMRES;
1036:   ksp->ops->solve                        = KSPSolve_DGMRES;
1037:   ksp->ops->destroy                      = KSPDestroy_DGMRES;
1038:   ksp->ops->view                         = KSPView_DGMRES;
1039:   ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1040:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1041:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

1043:   PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES);
1044:   PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES);
1045:   PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES);
1046:   PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES);
1047:   PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES);
1048:   /* -- New functions defined in DGMRES -- */
1049:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", KSPDGMRESSetEigen_DGMRES);
1050:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", KSPDGMRESSetMaxEigen_DGMRES);
1051:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", KSPDGMRESSetRatio_DGMRES);
1052:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", KSPDGMRESForce_DGMRES);
1053:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", KSPDGMRESComputeSchurForm_DGMRES);
1054:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", KSPDGMRESComputeDeflationData_DGMRES);
1055:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", KSPDGMRESApplyDeflation_DGMRES);
1056:   PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);

1058:   PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1059:   PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);

1061:   dgmres->haptol         = 1.0e-30;
1062:   dgmres->q_preallocate  = 0;
1063:   dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1064:   dgmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
1065:   dgmres->nrs            = NULL;
1066:   dgmres->sol_temp       = NULL;
1067:   dgmres->max_k          = GMRES_DEFAULT_MAXK;
1068:   dgmres->Rsvd           = NULL;
1069:   dgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
1070:   dgmres->orthogwork     = NULL;

1072:   /* Default values for the deflation */
1073:   dgmres->r           = 0;
1074:   dgmres->neig        = DGMRES_DEFAULT_EIG;
1075:   dgmres->max_neig    = DGMRES_DEFAULT_MAXEIG - 1;
1076:   dgmres->lambdaN     = 0.0;
1077:   dgmres->smv         = SMV;
1078:   dgmres->matvecs     = 0;
1079:   dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1080:   dgmres->HasSchur    = PETSC_FALSE;
1081:   return 0;
1082: }