Actual source code: gmreig.c
2: #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
3: #include <petscblaslapack.h>
5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp, PetscReal *emax, PetscReal *emin)
6: {
7: KSP_GMRES *gmres = (KSP_GMRES *)ksp->data;
8: PetscInt n = gmres->it + 1, i, N = gmres->max_k + 2;
9: PetscBLASInt bn, bN, lwork, idummy, lierr;
10: PetscScalar *R = gmres->Rsvd, *work = R + N * N, sdummy = 0;
11: PetscReal *realpart = gmres->Dsvd;
13: PetscBLASIntCast(n, &bn);
14: PetscBLASIntCast(N, &bN);
15: PetscBLASIntCast(5 * N, &lwork);
16: PetscBLASIntCast(N, &idummy);
17: if (n <= 0) {
18: *emax = *emin = 1.0;
19: return 0;
20: }
21: /* copy R matrix to work space */
22: PetscArraycpy(R, gmres->hh_origin, (gmres->max_k + 2) * (gmres->max_k + 1));
24: /* zero below diagonal garbage */
25: for (i = 0; i < n; i++) R[i * N + i + 1] = 0.0;
27: /* compute Singular Values */
28: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
29: #if !defined(PETSC_USE_COMPLEX)
30: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &bn, &bn, R, &bN, realpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, &lierr));
31: #else
32: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &bn, &bn, R, &bN, realpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, realpart + N, &lierr));
33: #endif
35: PetscFPTrapPop();
37: *emin = realpart[n - 1];
38: *emax = realpart[0];
39: return 0;
40: }
42: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp, PetscInt nmax, PetscReal *r, PetscReal *c, PetscInt *neig)
43: {
44: #if !defined(PETSC_USE_COMPLEX)
45: KSP_GMRES *gmres = (KSP_GMRES *)ksp->data;
46: PetscInt n = gmres->it + 1, N = gmres->max_k + 1, i, *perm;
47: PetscBLASInt bn, bN, lwork, idummy, l-1;
48: PetscScalar *R = gmres->Rsvd, *work = R + N * N;
49: PetscScalar *realpart = gmres->Dsvd, *imagpart = realpart + N, sdummy = 0;
51: PetscBLASIntCast(n, &bn);
52: PetscBLASIntCast(N, &bN);
53: PetscBLASIntCast(5 * N, &lwork);
54: PetscBLASIntCast(N, &idummy);
56: *neig = n;
58: if (!n) return 0;
60: /* copy R matrix to work space */
61: PetscArraycpy(R, gmres->hes_origin, N * N);
63: /* compute eigenvalues */
64: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
65: PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "N", &bn, R, &bN, realpart, imagpart, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, &lierr));
67: PetscFPTrapPop();
68: PetscMalloc1(n, &perm);
69: for (i = 0; i < n; i++) perm[i] = i;
70: PetscSortRealWithPermutation(n, realpart, perm);
71: for (i = 0; i < n; i++) {
72: r[i] = realpart[perm[i]];
73: c[i] = imagpart[perm[i]];
74: }
75: PetscFree(perm);
76: #else
77: KSP_GMRES *gmres = (KSP_GMRES *)ksp->data;
78: PetscInt n = gmres->it + 1, N = gmres->max_k + 1, i, *perm;
79: PetscScalar *R = gmres->Rsvd, *work = R + N * N, *eigs = work + 5 * N, sdummy;
80: PetscBLASInt bn, bN, lwork, idummy, l-1;
82: PetscBLASIntCast(n, &bn);
83: PetscBLASIntCast(N, &bN);
84: PetscBLASIntCast(5 * N, &lwork);
85: PetscBLASIntCast(N, &idummy);
87: *neig = n;
89: if (!n) return 0;
91: /* copy R matrix to work space */
92: PetscArraycpy(R, gmres->hes_origin, N * N);
94: /* compute eigenvalues */
95: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
96: PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "N", &bn, R, &bN, eigs, &sdummy, &idummy, &sdummy, &idummy, work, &lwork, gmres->Dsvd, &lierr));
98: PetscFPTrapPop();
99: PetscMalloc1(n, &perm);
100: for (i = 0; i < n; i++) perm[i] = i;
101: for (i = 0; i < n; i++) r[i] = PetscRealPart(eigs[i]);
102: PetscSortRealWithPermutation(n, r, perm);
103: for (i = 0; i < n; i++) {
104: r[i] = PetscRealPart(eigs[perm[i]]);
105: c[i] = PetscImaginaryPart(eigs[perm[i]]);
106: }
107: PetscFree(perm);
108: #endif
109: return 0;
110: }
112: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp, PetscBool ritz, PetscBool small, PetscInt *nrit, Vec S[], PetscReal *tetar, PetscReal *tetai)
113: {
114: KSP_GMRES *gmres = (KSP_GMRES *)ksp->data;
115: PetscInt NbrRitz, nb = 0, n;
116: PetscInt i, j, *perm;
117: PetscScalar *H, *Q, *Ht; /* H Hessenberg matrix; Q matrix of eigenvectors of H */
118: PetscScalar *wr, *wi; /* Real and imaginary part of the Ritz values */
119: PetscScalar *SR, *work;
120: PetscReal *modul;
121: PetscBLASInt bn, bN, lwork, idummy;
122: PetscScalar *t, sdummy = 0;
123: Mat A;
125: /* Express sizes in PetscBLASInt for LAPACK routines*/
126: PetscBLASIntCast(gmres->fullcycle ? gmres->max_k : gmres->it + 1, &bn); /* size of the Hessenberg matrix */
127: PetscBLASIntCast(gmres->max_k + 1, &bN); /* LDA of the Hessenberg matrix */
128: PetscBLASIntCast(gmres->max_k + 1, &idummy);
129: PetscBLASIntCast(5 * (gmres->max_k + 1) * (gmres->max_k + 1), &lwork);
131: /* NbrRitz: number of (Harmonic) Ritz pairs to extract */
132: NbrRitz = PetscMin(*nrit, bn);
133: KSPGetOperators(ksp, &A, NULL);
134: MatGetSize(A, &n, NULL);
135: NbrRitz = PetscMin(NbrRitz, n);
137: PetscMalloc4(bN * bN, &H, bn * bn, &Q, bn, &wr, bn, &wi);
139: /* copy H matrix to work space */
140: PetscArraycpy(H, gmres->fullcycle ? gmres->hes_ritz : gmres->hes_origin, bN * bN);
142: /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
143: if (!ritz) {
144: /* Transpose the Hessenberg matrix => Ht */
145: PetscMalloc1(bn * bn, &Ht);
146: for (i = 0; i < bn; i++) {
147: for (j = 0; j < bn; j++) Ht[i * bn + j] = PetscConj(H[j * bN + i]);
148: }
149: /* Solve the system H^T*t = h^2_{m+1,m}e_m */
150: PetscCalloc1(bn, &t);
151: /* t = h^2_{m+1,m}e_m */
152: if (gmres->fullcycle) t[bn - 1] = PetscSqr(gmres->hes_ritz[(bn - 1) * bN + bn]);
153: else t[bn - 1] = PetscSqr(gmres->hes_origin[(bn - 1) * bN + bn]);
155: /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
156: {
157: PetscBLASInt info;
158: PetscBLASInt nrhs = 1;
159: PetscBLASInt *ipiv;
160: PetscMalloc1(bn, &ipiv);
161: PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
163: PetscFree(ipiv);
164: PetscFree(Ht);
165: }
166: /* Form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
167: for (i = 0; i < bn; i++) H[(bn - 1) * bn + i] += t[i];
168: PetscFree(t);
169: }
171: /*
172: Compute (Harmonic) Ritz pairs;
173: For a real Ritz eigenvector at wr(j) Q(:,j) columns contain the real right eigenvector
174: For a complex Ritz pair of eigenvectors at wr(j), wi(j), wr(j+1), and wi(j+1), Q(:,j) + i Q(:,j+1) and Q(:,j) - i Q(:,j+1) are the two eigenvectors
175: */
176: {
177: PetscBLASInt info;
178: #if defined(PETSC_USE_COMPLEX)
179: PetscReal *rwork = NULL;
180: #endif
181: PetscMalloc1(lwork, &work);
182: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
183: #if !defined(PETSC_USE_COMPLEX)
184: PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "V", &bn, H, &bN, wr, wi, &sdummy, &idummy, Q, &bn, work, &lwork, &info));
185: #else
186: PetscMalloc1(2 * n, &rwork);
187: PetscCallBLAS("LAPACKgeev", LAPACKgeev_("N", "V", &bn, H, &bN, wr, &sdummy, &idummy, Q, &bn, work, &lwork, rwork, &info));
188: PetscFree(rwork);
189: #endif
191: PetscFPTrapPop();
192: PetscFree(work);
193: }
194: /* sort the (Harmonic) Ritz values */
195: PetscMalloc2(bn, &modul, bn, &perm);
196: #if defined(PETSC_USE_COMPLEX)
197: for (i = 0; i < bn; i++) modul[i] = PetscAbsScalar(wr[i]);
198: #else
199: for (i = 0; i < bn; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
200: #endif
201: for (i = 0; i < bn; i++) perm[i] = i;
202: PetscSortRealWithPermutation(bn, modul, perm);
204: #if defined(PETSC_USE_COMPLEX)
205: /* sort extracted (Harmonic) Ritz pairs */
206: nb = NbrRitz;
207: PetscMalloc1(nb * bn, &SR);
208: for (i = 0; i < nb; i++) {
209: if (small) {
210: tetar[i] = PetscRealPart(wr[perm[i]]);
211: tetai[i] = PetscImaginaryPart(wr[perm[i]]);
212: PetscArraycpy(&SR[i * bn], &(Q[perm[i] * bn]), bn);
213: } else {
214: tetar[i] = PetscRealPart(wr[perm[bn - nb + i]]);
215: tetai[i] = PetscImaginaryPart(wr[perm[bn - nb + i]]);
216: PetscArraycpy(&SR[i * bn], &(Q[perm[bn - nb + i] * bn]), bn); /* permute columns of Q */
217: }
218: }
219: #else
220: /* count the number of extracted (Harmonic) Ritz pairs (with complex conjugates) */
221: if (small) {
222: while (nb < NbrRitz) {
223: if (!wi[perm[nb]]) nb += 1;
224: else {
225: if (nb < NbrRitz - 1) nb += 2;
226: else break;
227: }
228: }
229: PetscMalloc1(nb * bn, &SR);
230: for (i = 0; i < nb; i++) {
231: tetar[i] = wr[perm[i]];
232: tetai[i] = wi[perm[i]];
233: PetscArraycpy(&SR[i * bn], &(Q[perm[i] * bn]), bn);
234: }
235: } else {
236: while (nb < NbrRitz) {
237: if (wi[perm[bn - nb - 1]] == 0) nb += 1;
238: else {
239: if (nb < NbrRitz - 1) nb += 2;
240: else break;
241: }
242: }
243: PetscMalloc1(nb * bn, &SR); /* bn rows, nb columns */
244: for (i = 0; i < nb; i++) {
245: tetar[i] = wr[perm[bn - nb + i]];
246: tetai[i] = wi[perm[bn - nb + i]];
247: PetscArraycpy(&SR[i * bn], &(Q[perm[bn - nb + i] * bn]), bn); /* permute columns of Q */
248: }
249: }
250: #endif
251: PetscFree2(modul, perm);
252: PetscFree4(H, Q, wr, wi);
254: /* Form the (Harmonic) Ritz vectors S = SR*V, columns of VV correspond to the basis of the Krylov subspace */
255: for (j = 0; j < nb; j++) {
256: VecZeroEntries(S[j]);
257: VecMAXPY(S[j], bn, &SR[j * bn], gmres->fullcycle ? gmres->vecb : &VEC_VV(0));
258: }
260: PetscFree(SR);
261: *nrit = nb;
262: return 0;
263: }