Actual source code: sr1.c
1: #include <../src/ksp/ksp/utils/lmvm/lmvm.h>
3: /*
4: Limited-memory Symmetric-Rank-1 method for approximating both
5: the forward product and inverse application of a Jacobian.
6: */
8: typedef struct {
9: Vec *P, *Q;
10: Vec work;
11: PetscBool allocated, needP, needQ;
12: PetscReal *stp, *ytq;
13: } Mat_LSR1;
15: /*------------------------------------------------------------*/
17: /*
18: The solution method is adapted from Algorithm 8 of Erway and Marcia
19: "On Solving Large-Scale Limited-Memory Quasi-Newton Equations"
20: (https://arxiv.org/abs/1510.06378).
22: Q[i] = S[i] - (B_i)^{-1}*Y[i] terms are computed ahead of time whenever
23: the matrix is updated with a new (S[i], Y[i]) pair. This allows
24: repeated calls of MatMult inside KSP solvers without unnecessarily
25: recomputing Q[i] terms in expensive nested-loops.
27: dX <- J0^{-1} * F
28: for i = 0,1,2,...,k
29: # Q[i] = S[i] - (B_i)^{-1}*Y[i]
30: zeta = (Q[i]^T F) / (Q[i]^T Y[i])
31: dX <- dX + (zeta * Q[i])
32: end
33: */
34: static PetscErrorCode MatSolve_LMVMSR1(Mat B, Vec F, Vec dX)
35: {
36: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
37: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
38: PetscInt i, j;
39: PetscScalar qjtyi, qtf, ytq;
41: VecCheckSameSize(F, 2, dX, 3);
42: VecCheckMatCompatible(B, dX, 3, F, 2);
44: if (lsr1->needQ) {
45: /* Pre-compute (Q[i] = S[i] - (B_i)^{-1} * Y[i]) and (Y[i]^T Q[i]) */
46: for (i = 0; i <= lmvm->k; ++i) {
47: MatLMVMApplyJ0Inv(B, lmvm->Y[i], lsr1->Q[i]);
48: VecAYPX(lsr1->Q[i], -1.0, lmvm->S[i]);
49: for (j = 0; j <= i - 1; ++j) {
50: VecDot(lsr1->Q[j], lmvm->Y[i], &qjtyi);
51: VecAXPY(lsr1->Q[i], -PetscRealPart(qjtyi) / lsr1->ytq[j], lsr1->Q[j]);
52: }
53: VecDot(lmvm->Y[i], lsr1->Q[i], &ytq);
54: lsr1->ytq[i] = PetscRealPart(ytq);
55: }
56: lsr1->needQ = PETSC_FALSE;
57: }
59: /* Invert the initial Jacobian onto F (or apply scaling) */
60: MatLMVMApplyJ0Inv(B, F, dX);
61: /* Start outer loop */
62: for (i = 0; i <= lmvm->k; ++i) {
63: VecDot(lsr1->Q[i], F, &qtf);
64: VecAXPY(dX, PetscRealPart(qtf) / lsr1->ytq[i], lsr1->Q[i]);
65: }
66: return 0;
67: }
69: /*------------------------------------------------------------*/
71: /*
72: The forward product is the matrix-free implementation of
73: Equation (6.24) in Nocedal and Wright "Numerical Optimization"
74: 2nd edition, pg 144.
76: Note that the structure of the forward product is identical to
77: the solution, with S and Y exchanging roles.
79: P[i] = Y[i] - (B_i)*S[i] terms are computed ahead of time whenever
80: the matrix is updated with a new (S[i], Y[i]) pair. This allows
81: repeated calls of MatMult inside KSP solvers without unnecessarily
82: recomputing P[i] terms in expensive nested-loops.
84: Z <- J0 * X
85: for i = 0,1,2,...,k
86: # P[i] = Y[i] - (B_i)*S[i]
87: zeta = (P[i]^T X) / (P[i]^T S[i])
88: Z <- Z + (zeta * P[i])
89: end
90: */
91: static PetscErrorCode MatMult_LMVMSR1(Mat B, Vec X, Vec Z)
92: {
93: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
94: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
95: PetscInt i, j;
96: PetscScalar pjtsi, ptx, stp;
98: VecCheckSameSize(X, 2, Z, 3);
99: VecCheckMatCompatible(B, X, 2, Z, 3);
101: if (lsr1->needP) {
102: /* Pre-compute (P[i] = Y[i] - (B_i) * S[i]) and (S[i]^T P[i]) */
103: for (i = 0; i <= lmvm->k; ++i) {
104: MatLMVMApplyJ0Fwd(B, lmvm->S[i], lsr1->P[i]);
105: VecAYPX(lsr1->P[i], -1.0, lmvm->Y[i]);
106: for (j = 0; j <= i - 1; ++j) {
107: VecDot(lsr1->P[j], lmvm->S[i], &pjtsi);
108: VecAXPY(lsr1->P[i], -PetscRealPart(pjtsi) / lsr1->stp[j], lsr1->P[j]);
109: }
110: VecDot(lmvm->S[i], lsr1->P[i], &stp);
111: lsr1->stp[i] = PetscRealPart(stp);
112: }
113: lsr1->needP = PETSC_FALSE;
114: }
116: MatLMVMApplyJ0Fwd(B, X, Z);
117: for (i = 0; i <= lmvm->k; ++i) {
118: VecDot(lsr1->P[i], X, &ptx);
119: VecAXPY(Z, PetscRealPart(ptx) / lsr1->stp[i], lsr1->P[i]);
120: }
121: return 0;
122: }
124: /*------------------------------------------------------------*/
126: static PetscErrorCode MatUpdate_LMVMSR1(Mat B, Vec X, Vec F)
127: {
128: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
129: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
130: PetscReal snorm, pnorm;
131: PetscScalar sktw;
133: if (!lmvm->m) return 0;
134: if (lmvm->prev_set) {
135: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
136: VecAYPX(lmvm->Xprev, -1.0, X);
137: VecAYPX(lmvm->Fprev, -1.0, F);
138: /* See if the updates can be accepted
139: NOTE: This tests abs(S[k]^T (Y[k] - B_k*S[k])) >= eps * norm(S[k]) * norm(Y[k] - B_k*S[k]) */
140: MatMult(B, lmvm->Xprev, lsr1->work);
141: VecAYPX(lsr1->work, -1.0, lmvm->Fprev);
142: VecDot(lmvm->Xprev, lsr1->work, &sktw);
143: VecNorm(lmvm->Xprev, NORM_2, &snorm);
144: VecNorm(lsr1->work, NORM_2, &pnorm);
145: if (PetscAbsReal(PetscRealPart(sktw)) >= lmvm->eps * snorm * pnorm) {
146: /* Update is good, accept it */
147: lsr1->needP = lsr1->needQ = PETSC_TRUE;
148: MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev);
149: } else {
150: /* Update is bad, skip it */
151: ++lmvm->nrejects;
152: }
153: }
154: /* Save the solution and function to be used in the next update */
155: VecCopy(X, lmvm->Xprev);
156: VecCopy(F, lmvm->Fprev);
157: lmvm->prev_set = PETSC_TRUE;
158: return 0;
159: }
161: /*------------------------------------------------------------*/
163: static PetscErrorCode MatCopy_LMVMSR1(Mat B, Mat M, MatStructure str)
164: {
165: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
166: Mat_LSR1 *bctx = (Mat_LSR1 *)bdata->ctx;
167: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
168: Mat_LSR1 *mctx = (Mat_LSR1 *)mdata->ctx;
169: PetscInt i;
171: mctx->needP = bctx->needP;
172: mctx->needQ = bctx->needQ;
173: for (i = 0; i <= bdata->k; ++i) {
174: mctx->stp[i] = bctx->stp[i];
175: mctx->ytq[i] = bctx->ytq[i];
176: VecCopy(bctx->P[i], mctx->P[i]);
177: VecCopy(bctx->Q[i], mctx->Q[i]);
178: }
179: return 0;
180: }
182: /*------------------------------------------------------------*/
184: static PetscErrorCode MatReset_LMVMSR1(Mat B, PetscBool destructive)
185: {
186: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
187: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
189: lsr1->needP = lsr1->needQ = PETSC_TRUE;
190: if (destructive && lsr1->allocated) {
191: VecDestroy(&lsr1->work);
192: PetscFree2(lsr1->stp, lsr1->ytq);
193: VecDestroyVecs(lmvm->m, &lsr1->P);
194: VecDestroyVecs(lmvm->m, &lsr1->Q);
195: lsr1->allocated = PETSC_FALSE;
196: }
197: MatReset_LMVM(B, destructive);
198: return 0;
199: }
201: /*------------------------------------------------------------*/
203: static PetscErrorCode MatAllocate_LMVMSR1(Mat B, Vec X, Vec F)
204: {
205: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
206: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
208: MatAllocate_LMVM(B, X, F);
209: if (!lsr1->allocated) {
210: VecDuplicate(X, &lsr1->work);
211: PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq);
212: if (lmvm->m > 0) {
213: VecDuplicateVecs(X, lmvm->m, &lsr1->P);
214: VecDuplicateVecs(X, lmvm->m, &lsr1->Q);
215: }
216: lsr1->allocated = PETSC_TRUE;
217: }
218: return 0;
219: }
221: /*------------------------------------------------------------*/
223: static PetscErrorCode MatDestroy_LMVMSR1(Mat B)
224: {
225: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
226: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
228: if (lsr1->allocated) {
229: VecDestroy(&lsr1->work);
230: PetscFree2(lsr1->stp, lsr1->ytq);
231: VecDestroyVecs(lmvm->m, &lsr1->P);
232: VecDestroyVecs(lmvm->m, &lsr1->Q);
233: lsr1->allocated = PETSC_FALSE;
234: }
235: PetscFree(lmvm->ctx);
236: MatDestroy_LMVM(B);
237: return 0;
238: }
240: /*------------------------------------------------------------*/
242: static PetscErrorCode MatSetUp_LMVMSR1(Mat B)
243: {
244: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
245: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
247: MatSetUp_LMVM(B);
248: if (!lsr1->allocated && lmvm->m > 0) {
249: VecDuplicate(lmvm->Xprev, &lsr1->work);
250: PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq);
251: if (lmvm->m > 0) {
252: VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->P);
253: VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->Q);
254: }
255: lsr1->allocated = PETSC_TRUE;
256: }
257: return 0;
258: }
260: /*------------------------------------------------------------*/
262: PetscErrorCode MatCreate_LMVMSR1(Mat B)
263: {
264: Mat_LMVM *lmvm;
265: Mat_LSR1 *lsr1;
267: MatCreate_LMVM(B);
268: PetscObjectChangeTypeName((PetscObject)B, MATLMVMSR1);
269: MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE);
270: B->ops->setup = MatSetUp_LMVMSR1;
271: B->ops->destroy = MatDestroy_LMVMSR1;
272: B->ops->solve = MatSolve_LMVMSR1;
274: lmvm = (Mat_LMVM *)B->data;
275: lmvm->square = PETSC_TRUE;
276: lmvm->ops->allocate = MatAllocate_LMVMSR1;
277: lmvm->ops->reset = MatReset_LMVMSR1;
278: lmvm->ops->update = MatUpdate_LMVMSR1;
279: lmvm->ops->mult = MatMult_LMVMSR1;
280: lmvm->ops->copy = MatCopy_LMVMSR1;
282: PetscNew(&lsr1);
283: lmvm->ctx = (void *)lsr1;
284: lsr1->allocated = PETSC_FALSE;
285: lsr1->needP = lsr1->needQ = PETSC_TRUE;
286: return 0;
287: }
289: /*------------------------------------------------------------*/
291: /*@
292: MatCreateLMVMSR1 - Creates a limited-memory Symmetric-Rank-1 approximation
293: matrix used for a Jacobian. L-SR1 is symmetric by construction, but is not
294: guaranteed to be positive-definite.
296: The provided local and global sizes must match the solution and function vectors
297: used with `MatLMVMUpdate()` and `MatSolve()`. The resulting L-SR1 matrix will have
298: storage vectors allocated with `VecCreateSeq()` in serial and `VecCreateMPI()` in
299: parallel. To use the L-SR1 matrix with other vector types, the matrix must be
300: created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
301: This ensures that the internal storage and work vectors are duplicated from the
302: correct type of vector.
304: Collective
306: Input Parameters:
307: + comm - MPI communicator
308: . n - number of local rows for storage vectors
309: - N - global size of the storage vectors
311: Output Parameter:
312: . B - the matrix
314: Level: intermediate
316: Note:
317: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
318: paradigm instead of this routine directly.
320: .seealso: [](chapter_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMSR1`, `MatCreateLMVMBFGS()`, `MatCreateLMVMDFP()`,
321: `MatCreateLMVMBrdn()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
322: @*/
323: PetscErrorCode MatCreateLMVMSR1(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
324: {
325: MatCreate(comm, B);
326: MatSetSizes(*B, n, n, N, N);
327: MatSetType(*B, MATLMVMSR1);
328: MatSetUp(*B);
329: return 0;
330: }