Actual source code: gmreig.c


  2: #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
  3: #include <petscblaslapack.h>

  5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp, PetscReal *emax, PetscReal *emin)
  6: {
  7:   KSP_GMRES   *gmres = (KSP_GMRES *)ksp->data;
  8:   PetscInt     n = gmres->it + 1, i, N = gmres->max_k + 2;
  9:   PetscBLASInt bn, bN, lwork, idummy, lierr;
 10:   PetscScalar *R = gmres->Rsvd, *work = R + N * N, sdummy = 0;
 11:   PetscReal   *realpart = gmres->Dsvd;

 13:   PetscBLASIntCast(n, &bn);
 14:   PetscBLASIntCast(N, &bN);
 15:   PetscBLASIntCast(5 * N, &lwork);
 16:   PetscBLASIntCast(N, &idummy);
 17:   if (n <= 0) {
 18:     *emax = *emin = 1.0;
 19:     return 0;
 20:   }
 21:   /* copy R matrix to work space */
 22:   PetscArraycpy(R, gmres->hh_origin, (gmres->max_k + 2) * (gmres->max_k + 1));

 24:   /* zero below diagonal garbage */
 25:   for (i = 0; i < n; i++) R[i * N + i + 1] = 0.0;

 27:   /* compute Singular Values */
 28:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 29: #if !defined(PETSC_USE_COMPLEX)
 30:   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &bn, &bn, R, &bN, realpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, &lierr));
 31: #else
 32:   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &bn, &bn, R, &bN, realpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, realpart + N, &lierr));
 33: #endif
 35:   PetscFPTrapPop();

 37:   *emin = realpart[n - 1];
 38:   *emax = realpart[0];
 39:   return 0;
 40: }

 42: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp, PetscInt nmax, PetscReal *r, PetscReal *c, PetscInt *neig)
 43: {
 44: #if !defined(PETSC_USE_COMPLEX)
 45:   KSP_GMRES   *gmres = (KSP_GMRES *)ksp->data;
 46:   PetscInt     n = gmres->it + 1, N = gmres->max_k + 1, i, *perm;
 47:   PetscBLASInt bn, bN, lwork, idummy, l-1;
 48:   PetscScalar *R = gmres->Rsvd, *work = R + N * N;
 49:   PetscScalar *realpart = gmres->Dsvd, *imagpart = realpart + N, sdummy = 0;

 51:   PetscBLASIntCast(n, &bn);
 52:   PetscBLASIntCast(N, &bN);
 53:   PetscBLASIntCast(5 * N, &lwork);
 54:   PetscBLASIntCast(N, &idummy);
 56:   *neig = n;

 58:   if (!n) return 0;

 60:   /* copy R matrix to work space */
 61:   PetscArraycpy(R, gmres->hes_origin, N * N);

 63:   /* compute eigenvalues */
 64:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 65:   PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "N", &bn, R, &bN, realpart, imagpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, &lierr));
 67:   PetscFPTrapPop();
 68:   PetscMalloc1(n, &perm);
 69:   for (i = 0; i < n; i++) perm[i] = i;
 70:   PetscSortRealWithPermutation(n, realpart, perm);
 71:   for (i = 0; i < n; i++) {
 72:     r[i] = realpart[perm[i]];
 73:     c[i] = imagpart[perm[i]];
 74:   }
 75:   PetscFree(perm);
 76: #else
 77:   KSP_GMRES   *gmres = (KSP_GMRES *)ksp->data;
 78:   PetscInt     n = gmres->it + 1, N = gmres->max_k + 1, i, *perm;
 79:   PetscScalar *R = gmres->Rsvd, *work = R + N * N, *eigs = work + 5 * N, sdummy;
 80:   PetscBLASInt bn, bN, lwork, idummy, l-1;

 82:   PetscBLASIntCast(n, &bn);
 83:   PetscBLASIntCast(N, &bN);
 84:   PetscBLASIntCast(5 * N, &lwork);
 85:   PetscBLASIntCast(N, &idummy);
 87:   *neig = n;

 89:   if (!n) return 0;

 91:   /* copy R matrix to work space */
 92:   PetscArraycpy(R, gmres->hes_origin, N * N);

 94:   /* compute eigenvalues */
 95:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 96:   PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "N", &bn, R, &bN, eigs, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, gmres->Dsvd, &lierr));
 98:   PetscFPTrapPop();
 99:   PetscMalloc1(n, &perm);
100:   for (i = 0; i < n; i++) perm[i] = i;
101:   for (i = 0; i < n; i++) r[i] = PetscRealPart(eigs[i]);
102:   PetscSortRealWithPermutation(n, r, perm);
103:   for (i = 0; i < n; i++) {
104:     r[i] = PetscRealPart(eigs[perm[i]]);
105:     c[i] = PetscImaginaryPart(eigs[perm[i]]);
106:   }
107:   PetscFree(perm);
108: #endif
109:   return 0;
110: }

112: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp, PetscBool ritz, PetscBool small, PetscInt *nrit, Vec S[], PetscReal *tetar, PetscReal *tetai)
113: {
114:   KSP_GMRES   *gmres = (KSP_GMRES *)ksp->data;
115:   PetscInt     NbrRitz, nb = 0, n;
116:   PetscInt     i, j, *perm;
117:   PetscScalar *H, *Q, *Ht; /* H Hessenberg matrix; Q matrix of eigenvectors of H */
118:   PetscScalar *wr, *wi;    /* Real and imaginary part of the Ritz values */
119:   PetscScalar *SR, *work;
120:   PetscReal   *modul;
121:   PetscBLASInt bn, bN, lwork, idummy;
122:   PetscScalar *t, sdummy = 0;
123:   Mat          A;

125:   /* Express sizes in PetscBLASInt for LAPACK routines*/
126:   PetscBLASIntCast(gmres->fullcycle ? gmres->max_k : gmres->it + 1, &bn); /* size of the Hessenberg matrix */
127:   PetscBLASIntCast(gmres->max_k + 1, &bN);                                /* LDA of the Hessenberg matrix */
128:   PetscBLASIntCast(gmres->max_k + 1, &idummy);
129:   PetscBLASIntCast(5 * (gmres->max_k + 1) * (gmres->max_k + 1), &lwork);

131:   /* NbrRitz: number of (Harmonic) Ritz pairs to extract */
132:   NbrRitz = PetscMin(*nrit, bn);
133:   KSPGetOperators(ksp, &A, NULL);
134:   MatGetSize(A, &n, NULL);
135:   NbrRitz = PetscMin(NbrRitz, n);

137:   PetscMalloc4(bN * bN, &H, bn * bn, &Q, bn, &wr, bn, &wi);

139:   /* copy H matrix to work space */
140:   PetscArraycpy(H, gmres->fullcycle ? gmres->hes_ritz : gmres->hes_origin, bN * bN);

142:   /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
143:   if (!ritz) {
144:     /* Transpose the Hessenberg matrix => Ht */
145:     PetscMalloc1(bn * bn, &Ht);
146:     for (i = 0; i < bn; i++) {
147:       for (j = 0; j < bn; j++) Ht[i * bn + j] = PetscConj(H[j * bN + i]);
148:     }
149:     /* Solve the system H^T*t = h^2_{m+1,m}e_m */
150:     PetscCalloc1(bn, &t);
151:     /* t = h^2_{m+1,m}e_m */
152:     if (gmres->fullcycle) t[bn - 1] = PetscSqr(gmres->hes_ritz[(bn - 1) * bN + bn]);
153:     else t[bn - 1] = PetscSqr(gmres->hes_origin[(bn - 1) * bN + bn]);

155:     /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
156:     {
157:       PetscBLASInt  info;
158:       PetscBLASInt  nrhs = 1;
159:       PetscBLASInt *ipiv;
160:       PetscMalloc1(bn, &ipiv);
161:       PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
163:       PetscFree(ipiv);
164:       PetscFree(Ht);
165:     }
166:     /* Form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
167:     for (i = 0; i < bn; i++) H[(bn - 1) * bn + i] += t[i];
168:     PetscFree(t);
169:   }

171:   /*
172:     Compute (Harmonic) Ritz pairs;
173:     For a real Ritz eigenvector at wr(j)  Q(:,j) columns contain the real right eigenvector
174:     For a complex Ritz pair of eigenvectors at wr(j), wi(j), wr(j+1), and wi(j+1), Q(:,j) + i Q(:,j+1) and Q(:,j) - i Q(:,j+1) are the two eigenvectors
175:   */
176:   {
177:     PetscBLASInt info;
178: #if defined(PETSC_USE_COMPLEX)
179:     PetscReal *rwork = NULL;
180: #endif
181:     PetscMalloc1(lwork, &work);
182:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
183: #if !defined(PETSC_USE_COMPLEX)
184:     PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "V", &bn, H, &bN, wr, wi, &sdummy, &idummy, Q, &bn, work, &lwork, &info));
185: #else
186:     PetscMalloc1(2 * n, &rwork);
187:     PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "V", &bn, H, &bN, wr, &sdummy, &idummy, Q, &bn, work, &lwork, rwork, &info));
188:     PetscFree(rwork);
189: #endif
191:     PetscFPTrapPop();
192:     PetscFree(work);
193:   }
194:   /* sort the (Harmonic) Ritz values */
195:   PetscMalloc2(bn, &modul, bn, &perm);
196: #if defined(PETSC_USE_COMPLEX)
197:   for (i = 0; i < bn; i++) modul[i] = PetscAbsScalar(wr[i]);
198: #else
199:   for (i = 0; i < bn; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
200: #endif
201:   for (i = 0; i < bn; i++) perm[i] = i;
202:   PetscSortRealWithPermutation(bn, modul, perm);

204: #if defined(PETSC_USE_COMPLEX)
205:   /* sort extracted (Harmonic) Ritz pairs */
206:   nb = NbrRitz;
207:   PetscMalloc1(nb * bn, &SR);
208:   for (i = 0; i < nb; i++) {
209:     if (small) {
210:       tetar[i] = PetscRealPart(wr[perm[i]]);
211:       tetai[i] = PetscImaginaryPart(wr[perm[i]]);
212:       PetscArraycpy(&SR[i * bn], &(Q[perm[i] * bn]), bn);
213:     } else {
214:       tetar[i] = PetscRealPart(wr[perm[bn - nb + i]]);
215:       tetai[i] = PetscImaginaryPart(wr[perm[bn - nb + i]]);
216:       PetscArraycpy(&SR[i * bn], &(Q[perm[bn - nb + i] * bn]), bn); /* permute columns of Q */
217:     }
218:   }
219: #else
220:   /* count the number of extracted (Harmonic) Ritz pairs (with complex conjugates) */
221:   if (small) {
222:     while (nb < NbrRitz) {
223:       if (!wi[perm[nb]]) nb += 1;
224:       else {
225:         if (nb < NbrRitz - 1) nb += 2;
226:         else break;
227:       }
228:     }
229:     PetscMalloc1(nb * bn, &SR);
230:     for (i = 0; i < nb; i++) {
231:       tetar[i] = wr[perm[i]];
232:       tetai[i] = wi[perm[i]];
233:       PetscArraycpy(&SR[i * bn], &(Q[perm[i] * bn]), bn);
234:     }
235:   } else {
236:     while (nb < NbrRitz) {
237:       if (wi[perm[bn - nb - 1]] == 0) nb += 1;
238:       else {
239:         if (nb < NbrRitz - 1) nb += 2;
240:         else break;
241:       }
242:     }
243:     PetscMalloc1(nb * bn, &SR); /* bn rows, nb columns */
244:     for (i = 0; i < nb; i++) {
245:       tetar[i] = wr[perm[bn - nb + i]];
246:       tetai[i] = wi[perm[bn - nb + i]];
247:       PetscArraycpy(&SR[i * bn], &(Q[perm[bn - nb + i] * bn]), bn); /* permute columns of Q */
248:     }
249:   }
250: #endif
251:   PetscFree2(modul, perm);
252:   PetscFree4(H, Q, wr, wi);

254:   /* Form the (Harmonic) Ritz vectors S = SR*V, columns of VV correspond to the basis of the Krylov subspace */
255:   for (j = 0; j < nb; j++) {
256:     VecZeroEntries(S[j]);
257:     VecMAXPY(S[j], bn, &SR[j * bn], gmres->fullcycle ? gmres->vecb : &VEC_VV(0));
258:   }

260:   PetscFree(SR);
261:   *nrit = nb;
262:   return 0;
263: }