Actual source code: agmresdeflation.c
1: /*
2: * This file computes data for the deflated restarting in the Newton-basis GMRES. At each restart (or at each detected stagnation in the adaptive strategy), a basis of an (approximated)invariant subspace corresponding to the smallest eigenvalues is extracted from the Krylov subspace. It is then used to augment the Newton basis.
3: *
4: * References : D. Nuentsa Wakam and J. Erhel, Parallelism and robustness in GMRES with the Newton basis and the deflation of eigenvalues. Research report INRIA RR-7787.
5: * Author: Desire NUENTSA WAKAM <desire.nuentsa_wakam@inria.fr>, 2011
6: */
8: #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>
10: /* Quicksort algorithm to sort the eigenvalues in increasing orders
11: * val_r - real part of eigenvalues, unchanged on exit.
12: * val_i - Imaginary part of eigenvalues unchanged on exit.
13: * size - Number of eigenvalues (with complex conjugates)
14: * perm - contains on exit the permutation vector to reorder the vectors val_r and val_i.
15: */
16: #define DEPTH 500
17: static PetscErrorCode KSPAGMRESQuickSort(PetscScalar *val_r, PetscScalar *val_i, PetscInt size, PetscInt *perm)
18: {
19: PetscInt deb[DEPTH], fin[DEPTH];
20: PetscInt ipivot;
21: PetscScalar pivot_r, pivot_i;
22: PetscInt i, L, R, j;
23: PetscScalar abs_pivot;
24: PetscScalar abs_val;
26: /* initialize perm vector */
27: for (j = 0; j < size; j++) perm[j] = j;
29: deb[0] = 0;
30: fin[0] = size;
31: i = 0;
32: while (i >= 0) {
33: L = deb[i];
34: R = fin[i] - 1;
35: if (L < R) {
36: pivot_r = val_r[L];
37: pivot_i = val_i[L];
38: abs_pivot = PetscSqrtReal(pivot_r * pivot_r + pivot_i * pivot_i);
39: ipivot = perm[L];
41: while (L < R) {
42: abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
43: while (abs_val >= abs_pivot && L < R) {
44: R--;
45: abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
46: }
47: if (L < R) {
48: val_r[L] = val_r[R];
49: val_i[L] = val_i[R];
50: perm[L] = perm[R];
51: L += 1;
52: }
53: abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
54: while (abs_val <= abs_pivot && L < R) {
55: L++;
56: abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
57: }
58: if (L < R) {
59: val_r[R] = val_r[L];
60: val_i[R] = val_i[L];
61: perm[R] = perm[L];
62: R -= 1;
63: }
64: }
65: val_r[L] = pivot_r;
66: val_i[L] = pivot_i;
67: perm[L] = ipivot;
68: deb[i + 1] = L + 1;
69: fin[i + 1] = fin[i];
70: fin[i] = L;
71: i += 1;
73: } else i--;
74: }
75: return 0;
76: }
78: /*
79: * Compute the Schur vectors from the generalized eigenvalue problem A.u =\lambda.B.u
80: * KspSize - rank of the matrices A and B, size of the current Krylov basis
81: * A - Left matrix
82: * B - Right matrix
83: * ldA - first dimension of A as declared in the calling program
84: * ldB - first dimension of B as declared in the calling program
85: * IsReduced - specifies if the matrices are already in the reduced form,
86: * i.e A is a Hessenberg matrix and B is upper triangular.
87: * Sr - on exit, the extracted Schur vectors corresponding
88: * the smallest eigenvalues (with complex conjugates)
89: * CurNeig - Number of extracted eigenvalues
90: */
91: static PetscErrorCode KSPAGMRESSchurForm(KSP ksp, PetscBLASInt KspSize, PetscScalar *A, PetscBLASInt ldA, PetscScalar *B, PetscBLASInt ldB, PetscBool IsReduced, PetscScalar *Sr, PetscInt *CurNeig)
92: {
93: KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
94: PetscInt max_k = agmres->max_k;
95: PetscBLASInt r;
96: PetscInt neig = agmres->neig;
97: PetscScalar *wr = agmres->wr;
98: PetscScalar *wi = agmres->wi;
99: PetscScalar *beta = agmres->beta;
100: PetscScalar *Q = agmres->Q;
101: PetscScalar *Z = agmres->Z;
102: PetscScalar *work = agmres->work;
103: PetscBLASInt *select = agmres->select;
104: PetscInt *perm = agmres->perm;
105: PetscBLASInt sdim = 0;
106: PetscInt i, j;
107: PetscBLASInt info;
108: PetscBLASInt *iwork = agmres->iwork;
109: PetscBLASInt N = MAXKSPSIZE;
110: PetscBLASInt lwork, liwork;
111: PetscBLASInt ilo, ihi;
112: PetscBLASInt ijob, wantQ, wantZ;
113: PetscScalar Dif[2];
115: ijob = 2;
116: wantQ = 1;
117: wantZ = 1;
118: PetscBLASIntCast(PetscMax(8 * N + 16, 4 * neig * (N - neig)), &lwork);
119: PetscBLASIntCast(2 * N * neig, &liwork);
120: ilo = 1;
121: PetscBLASIntCast(KspSize, &ihi);
123: /* Compute the Schur form */
124: if (IsReduced) { /* The eigenvalue problem is already in reduced form, meaning that A is upper Hessenberg and B is triangular */
125: PetscCallBLAS("LAPACKhgeqz", LAPACKhgeqz_("S", "I", "I", &KspSize, &ilo, &ihi, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, work, &lwork, &info));
127: } else {
128: PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &KspSize, A, &ldA, B, &ldB, &sdim, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
130: }
132: /* We should avoid computing these ratio... */
133: for (i = 0; i < KspSize; i++) {
134: if (beta[i] != 0.0) {
135: wr[i] /= beta[i];
136: wi[i] /= beta[i];
137: }
138: }
140: /* Sort the eigenvalues to extract the smallest ones */
141: KSPAGMRESQuickSort(wr, wi, KspSize, perm);
143: /* Count the number of extracted eigenvalues (with complex conjugates) */
144: r = 0;
145: while (r < neig) {
146: if (wi[r] != 0) r += 2;
147: else r += 1;
148: }
149: /* Reorder the Schur decomposition so that the cluster of smallest/largest eigenvalues appears in the leading diagonal blocks of A (and B)*/
150: PetscArrayzero(select, N);
151: if (!agmres->GreatestEig) {
152: for (j = 0; j < r; j++) select[perm[j]] = 1;
153: } else {
154: for (j = 0; j < r; j++) select[perm[KspSize - j - 1]] = 1;
155: }
156: PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &KspSize, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, &r, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
158: /* Extract the Schur vectors associated to the r smallest eigenvalues */
159: PetscArrayzero(Sr, (N + 1) * r);
160: for (j = 0; j < r; j++) {
161: for (i = 0; i < KspSize; i++) Sr[j * (N + 1) + i] = Z[j * N + i];
162: }
164: /* Broadcast Sr to all other processes to have consistent data;
165: * FIXME should investigate how to get unique Schur vectors (unique QR factorization, probably the sign of rotations) */
166: MPI_Bcast(Sr, (N + 1) * r, MPIU_SCALAR, agmres->First, PetscObjectComm((PetscObject)ksp));
167: /* Update the Shift values for the Newton basis. This is surely necessary when applying the DeflationPrecond */
168: if (agmres->DeflPrecond) KSPAGMRESLejaOrdering(wr, wi, agmres->Rshift, agmres->Ishift, max_k);
169: *CurNeig = r; /* Number of extracted eigenvalues */
170: return 0;
171: }
173: /*
174: * This function form the matrices for the generalized eigenvalue problem,
175: * it then compute the Schur vectors needed to augment the Newton basis.
176: */
177: PetscErrorCode KSPAGMRESComputeDeflationData(KSP ksp)
178: {
179: KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
180: Vec *U = agmres->U;
181: Vec *TmpU = agmres->TmpU;
182: PetscScalar *MatEigL = agmres->MatEigL;
183: PetscScalar *MatEigR = agmres->MatEigR;
184: PetscScalar *Sr = agmres->Sr;
185: PetscScalar alpha, beta;
186: PetscInt i, j;
187: PetscInt max_k = agmres->max_k; /* size of the non - augmented subspace */
188: PetscInt CurNeig; /* Current number of extracted eigenvalues */
189: PetscInt N = MAXKSPSIZE;
190: PetscBLASInt bN;
191: PetscInt lC = N + 1;
192: PetscInt KspSize = KSPSIZE;
193: PetscBLASInt blC, bKspSize;
194: PetscInt PrevNeig = agmres->r;
196: PetscLogEventBegin(KSP_AGMRESComputeDeflationData, ksp, 0, 0, 0);
197: if (agmres->neig <= 1) return 0;
198: /* Explicitly form MatEigL = H^T*H, It can also be formed as H^T+h_{N+1,N}H^-1e^T */
199: alpha = 1.0;
200: beta = 0.0;
201: PetscBLASIntCast(KspSize, &bKspSize);
202: PetscBLASIntCast(lC, &blC);
203: PetscBLASIntCast(N, &bN);
204: PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &bKspSize, &bKspSize, &blC, &alpha, agmres->hes_origin, &blC, agmres->hes_origin, &blC, &beta, MatEigL, &bN));
205: if (!agmres->ritz) {
206: /* Form TmpU = V*H where V is the Newton basis orthogonalized with roddec*/
207: for (j = 0; j < KspSize; j++) {
208: /* Apply the elementary reflectors (stored in Qloc) on H */
209: KSPAGMRESRodvec(ksp, KspSize + 1, &agmres->hes_origin[j * lC], TmpU[j]);
210: }
211: /* Now form MatEigR = TmpU^T*W where W is [VEC_V(1:max_k); U] */
212: for (j = 0; j < max_k; j++) VecMDot(VEC_V(j), KspSize, TmpU, &MatEigR[j * N]);
213: for (j = max_k; j < KspSize; j++) VecMDot(U[j - max_k], KspSize, TmpU, &MatEigR[j * N]);
214: } else { /* Form H^T */
215: for (j = 0; j < N; j++) {
216: for (i = 0; i < N; i++) MatEigR[j * N + i] = agmres->hes_origin[i * lC + j];
217: }
218: }
219: /* Obtain the Schur form of the generalized eigenvalue problem MatEigL*y = \lambda*MatEigR*y */
220: if (agmres->DeflPrecond) {
221: KSPAGMRESSchurForm(ksp, KspSize, agmres->hes_origin, lC, agmres->Rloc, lC, PETSC_TRUE, Sr, &CurNeig);
222: } else {
223: KSPAGMRESSchurForm(ksp, KspSize, MatEigL, N, MatEigR, N, PETSC_FALSE, Sr, &CurNeig);
224: }
226: if (agmres->DeflPrecond) { /* Switch to DGMRES to improve the basis of the invariant subspace associated to the deflation */
227: agmres->HasSchur = PETSC_TRUE;
228: KSPDGMRESComputeDeflationData(ksp, &CurNeig);
229: return 0;
230: }
231: /* Form the Schur vectors in the entire subspace: U = W * Sr where W = [VEC_V(1:max_k); U]*/
232: for (j = 0; j < PrevNeig; j++) { /* First, copy U to a temporary place */
233: VecCopy(U[j], TmpU[j]);
234: }
236: for (j = 0; j < CurNeig; j++) {
237: VecZeroEntries(U[j]);
238: VecMAXPY(U[j], max_k, &Sr[j * (N + 1)], &VEC_V(0));
239: VecMAXPY(U[j], PrevNeig, &Sr[j * (N + 1) + max_k], TmpU);
240: }
241: agmres->r = CurNeig;
242: PetscLogEventEnd(KSP_AGMRESComputeDeflationData, ksp, 0, 0, 0);
243: return 0;
244: }