Actual source code: agmresdeflation.c

  1: /*
  2:  *  This file computes data for the deflated restarting in the Newton-basis GMRES. At each restart (or at each detected stagnation in the adaptive strategy), a basis of an (approximated)invariant subspace corresponding to the smallest eigenvalues is extracted from the Krylov subspace. It is then used to augment the Newton basis.
  3:  *
  4:  * References : D. Nuentsa Wakam and J. Erhel, Parallelism and robustness in GMRES with the Newton basis and the deflation of eigenvalues. Research report INRIA RR-7787.
  5:  * Author: Desire NUENTSA WAKAM <desire.nuentsa_wakam@inria.fr>, 2011
  6:  */

  8: #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>

 10: /* Quicksort algorithm to sort  the eigenvalues in increasing orders
 11:  * val_r - real part of eigenvalues, unchanged on exit.
 12:  * val_i - Imaginary part of eigenvalues unchanged on exit.
 13:  * size - Number of eigenvalues (with complex conjugates)
 14:  * perm - contains on exit the permutation vector to reorder the vectors val_r and val_i.
 15:  */
 16: #define DEPTH 500
 17: static PetscErrorCode KSPAGMRESQuickSort(PetscScalar *val_r, PetscScalar *val_i, PetscInt size, PetscInt *perm)
 18: {
 19:   PetscInt    deb[DEPTH], fin[DEPTH];
 20:   PetscInt    ipivot;
 21:   PetscScalar pivot_r, pivot_i;
 22:   PetscInt    i, L, R, j;
 23:   PetscScalar abs_pivot;
 24:   PetscScalar abs_val;

 26:   /* initialize perm vector */
 27:   for (j = 0; j < size; j++) perm[j] = j;

 29:   deb[0] = 0;
 30:   fin[0] = size;
 31:   i      = 0;
 32:   while (i >= 0) {
 33:     L = deb[i];
 34:     R = fin[i] - 1;
 35:     if (L < R) {
 36:       pivot_r   = val_r[L];
 37:       pivot_i   = val_i[L];
 38:       abs_pivot = PetscSqrtReal(pivot_r * pivot_r + pivot_i * pivot_i);
 39:       ipivot    = perm[L];
 41:       while (L < R) {
 42:         abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
 43:         while (abs_val >= abs_pivot && L < R) {
 44:           R--;
 45:           abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
 46:         }
 47:         if (L < R) {
 48:           val_r[L] = val_r[R];
 49:           val_i[L] = val_i[R];
 50:           perm[L]  = perm[R];
 51:           L += 1;
 52:         }
 53:         abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
 54:         while (abs_val <= abs_pivot && L < R) {
 55:           L++;
 56:           abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
 57:         }
 58:         if (L < R) {
 59:           val_r[R] = val_r[L];
 60:           val_i[R] = val_i[L];
 61:           perm[R]  = perm[L];
 62:           R -= 1;
 63:         }
 64:       }
 65:       val_r[L]   = pivot_r;
 66:       val_i[L]   = pivot_i;
 67:       perm[L]    = ipivot;
 68:       deb[i + 1] = L + 1;
 69:       fin[i + 1] = fin[i];
 70:       fin[i]     = L;
 71:       i += 1;
 73:     } else i--;
 74:   }
 75:   return 0;
 76: }

 78: /*
 79:  * Compute the Schur vectors from the generalized eigenvalue problem A.u =\lambda.B.u
 80:  * KspSize -  rank of the matrices A and B, size of the current Krylov basis
 81:  * A - Left matrix
 82:  * B - Right matrix
 83:  * ldA - first dimension of A as declared  in the calling program
 84:  * ldB - first dimension of B as declared  in the calling program
 85:  * IsReduced - specifies if the matrices are already in the reduced form,
 86:  * i.e A is a Hessenberg matrix and B is upper triangular.
 87:  * Sr - on exit, the extracted Schur vectors corresponding
 88:  * the smallest eigenvalues (with complex conjugates)
 89:  * CurNeig - Number of extracted eigenvalues
 90:  */
 91: static PetscErrorCode KSPAGMRESSchurForm(KSP ksp, PetscBLASInt KspSize, PetscScalar *A, PetscBLASInt ldA, PetscScalar *B, PetscBLASInt ldB, PetscBool IsReduced, PetscScalar *Sr, PetscInt *CurNeig)
 92: {
 93:   KSP_AGMRES   *agmres = (KSP_AGMRES *)ksp->data;
 94:   PetscInt      max_k  = agmres->max_k;
 95:   PetscBLASInt  r;
 96:   PetscInt      neig   = agmres->neig;
 97:   PetscScalar  *wr     = agmres->wr;
 98:   PetscScalar  *wi     = agmres->wi;
 99:   PetscScalar  *beta   = agmres->beta;
100:   PetscScalar  *Q      = agmres->Q;
101:   PetscScalar  *Z      = agmres->Z;
102:   PetscScalar  *work   = agmres->work;
103:   PetscBLASInt *select = agmres->select;
104:   PetscInt     *perm   = agmres->perm;
105:   PetscBLASInt  sdim   = 0;
106:   PetscInt      i, j;
107:   PetscBLASInt  info;
108:   PetscBLASInt *iwork = agmres->iwork;
109:   PetscBLASInt  N     = MAXKSPSIZE;
110:   PetscBLASInt  lwork, liwork;
111:   PetscBLASInt  ilo, ihi;
112:   PetscBLASInt  ijob, wantQ, wantZ;
113:   PetscScalar   Dif[2];

115:   ijob  = 2;
116:   wantQ = 1;
117:   wantZ = 1;
118:   PetscBLASIntCast(PetscMax(8 * N + 16, 4 * neig * (N - neig)), &lwork);
119:   PetscBLASIntCast(2 * N * neig, &liwork);
120:   ilo = 1;
121:   PetscBLASIntCast(KspSize, &ihi);

123:   /* Compute the Schur form */
124:   if (IsReduced) { /* The eigenvalue problem is already in reduced form, meaning that A is upper Hessenberg and B is triangular */
125:     PetscCallBLAS("LAPACKhgeqz", LAPACKhgeqz_("S", "I", "I", &KspSize, &ilo, &ihi, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, work, &lwork, &info));
127:   } else {
128:     PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &KspSize, A, &ldA, B, &ldB, &sdim, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
130:   }

132:   /* We should avoid computing these ratio...  */
133:   for (i = 0; i < KspSize; i++) {
134:     if (beta[i] != 0.0) {
135:       wr[i] /= beta[i];
136:       wi[i] /= beta[i];
137:     }
138:   }

140:   /* Sort the eigenvalues to extract the smallest ones */
141:   KSPAGMRESQuickSort(wr, wi, KspSize, perm);

143:   /* Count the number of extracted eigenvalues (with complex conjugates) */
144:   r = 0;
145:   while (r < neig) {
146:     if (wi[r] != 0) r += 2;
147:     else r += 1;
148:   }
149:   /* Reorder the Schur decomposition so that the cluster of smallest/largest eigenvalues appears in the leading diagonal blocks of A (and B)*/
150:   PetscArrayzero(select, N);
151:   if (!agmres->GreatestEig) {
152:     for (j = 0; j < r; j++) select[perm[j]] = 1;
153:   } else {
154:     for (j = 0; j < r; j++) select[perm[KspSize - j - 1]] = 1;
155:   }
156:   PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &KspSize, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, &r, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
158:   /* Extract the Schur vectors associated to the r smallest eigenvalues */
159:   PetscArrayzero(Sr, (N + 1) * r);
160:   for (j = 0; j < r; j++) {
161:     for (i = 0; i < KspSize; i++) Sr[j * (N + 1) + i] = Z[j * N + i];
162:   }

164:   /* Broadcast Sr to all other processes to have consistent data;
165:    * FIXME should investigate how to get unique Schur vectors (unique QR factorization, probably the sign of rotations) */
166:   MPI_Bcast(Sr, (N + 1) * r, MPIU_SCALAR, agmres->First, PetscObjectComm((PetscObject)ksp));
167:   /* Update the Shift values for the Newton basis. This is surely necessary when applying the DeflationPrecond */
168:   if (agmres->DeflPrecond) KSPAGMRESLejaOrdering(wr, wi, agmres->Rshift, agmres->Ishift, max_k);
169:   *CurNeig = r; /* Number of extracted eigenvalues */
170:   return 0;
171: }

173: /*
174:  * This function form the matrices for the generalized eigenvalue problem,
175:  * it then compute the Schur vectors needed to augment the Newton basis.
176:  */
177: PetscErrorCode KSPAGMRESComputeDeflationData(KSP ksp)
178: {
179:   KSP_AGMRES  *agmres  = (KSP_AGMRES *)ksp->data;
180:   Vec         *U       = agmres->U;
181:   Vec         *TmpU    = agmres->TmpU;
182:   PetscScalar *MatEigL = agmres->MatEigL;
183:   PetscScalar *MatEigR = agmres->MatEigR;
184:   PetscScalar *Sr      = agmres->Sr;
185:   PetscScalar  alpha, beta;
186:   PetscInt     i, j;
187:   PetscInt     max_k = agmres->max_k; /* size of the non - augmented subspace */
188:   PetscInt     CurNeig;               /* Current number of extracted eigenvalues */
189:   PetscInt     N = MAXKSPSIZE;
190:   PetscBLASInt bN;
191:   PetscInt     lC      = N + 1;
192:   PetscInt     KspSize = KSPSIZE;
193:   PetscBLASInt blC, bKspSize;
194:   PetscInt     PrevNeig = agmres->r;

196:   PetscLogEventBegin(KSP_AGMRESComputeDeflationData, ksp, 0, 0, 0);
197:   if (agmres->neig <= 1) return 0;
198:   /* Explicitly form MatEigL = H^T*H, It can also be formed as H^T+h_{N+1,N}H^-1e^T */
199:   alpha = 1.0;
200:   beta  = 0.0;
201:   PetscBLASIntCast(KspSize, &bKspSize);
202:   PetscBLASIntCast(lC, &blC);
203:   PetscBLASIntCast(N, &bN);
204:   PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &bKspSize, &bKspSize, &blC, &alpha, agmres->hes_origin, &blC, agmres->hes_origin, &blC, &beta, MatEigL, &bN));
205:   if (!agmres->ritz) {
206:     /* Form TmpU = V*H where V is the Newton basis orthogonalized  with roddec*/
207:     for (j = 0; j < KspSize; j++) {
208:       /* Apply the elementary reflectors (stored in Qloc) on H */
209:       KSPAGMRESRodvec(ksp, KspSize + 1, &agmres->hes_origin[j * lC], TmpU[j]);
210:     }
211:     /* Now form MatEigR = TmpU^T*W where W is [VEC_V(1:max_k); U] */
212:     for (j = 0; j < max_k; j++) VecMDot(VEC_V(j), KspSize, TmpU, &MatEigR[j * N]);
213:     for (j = max_k; j < KspSize; j++) VecMDot(U[j - max_k], KspSize, TmpU, &MatEigR[j * N]);
214:   } else { /* Form H^T */
215:     for (j = 0; j < N; j++) {
216:       for (i = 0; i < N; i++) MatEigR[j * N + i] = agmres->hes_origin[i * lC + j];
217:     }
218:   }
219:   /* Obtain the Schur form of  the generalized eigenvalue problem MatEigL*y = \lambda*MatEigR*y */
220:   if (agmres->DeflPrecond) {
221:     KSPAGMRESSchurForm(ksp, KspSize, agmres->hes_origin, lC, agmres->Rloc, lC, PETSC_TRUE, Sr, &CurNeig);
222:   } else {
223:     KSPAGMRESSchurForm(ksp, KspSize, MatEigL, N, MatEigR, N, PETSC_FALSE, Sr, &CurNeig);
224:   }

226:   if (agmres->DeflPrecond) { /* Switch to DGMRES to improve the basis of the invariant subspace associated to the deflation */
227:     agmres->HasSchur = PETSC_TRUE;
228:     KSPDGMRESComputeDeflationData(ksp, &CurNeig);
229:     return 0;
230:   }
231:   /* Form the Schur vectors in the entire subspace: U = W * Sr where W = [VEC_V(1:max_k); U]*/
232:   for (j = 0; j < PrevNeig; j++) { /* First, copy U to a temporary place */
233:     VecCopy(U[j], TmpU[j]);
234:   }

236:   for (j = 0; j < CurNeig; j++) {
237:     VecZeroEntries(U[j]);
238:     VecMAXPY(U[j], max_k, &Sr[j * (N + 1)], &VEC_V(0));
239:     VecMAXPY(U[j], PrevNeig, &Sr[j * (N + 1) + max_k], TmpU);
240:   }
241:   agmres->r = CurNeig;
242:   PetscLogEventEnd(KSP_AGMRESComputeDeflationData, ksp, 0, 0, 0);
243:   return 0;
244: }