Actual source code: sr1.c

  1: #include <../src/ksp/ksp/utils/lmvm/lmvm.h>

  3: /*
  4:   Limited-memory Symmetric-Rank-1 method for approximating both
  5:   the forward product and inverse application of a Jacobian.
  6: */

  8: typedef struct {
  9:   Vec       *P, *Q;
 10:   Vec        work;
 11:   PetscBool  allocated, needP, needQ;
 12:   PetscReal *stp, *ytq;
 13: } Mat_LSR1;

 15: /*------------------------------------------------------------*/

 17: /*
 18:   The solution method is adapted from Algorithm 8 of Erway and Marcia
 19:   "On Solving Large-Scale Limited-Memory Quasi-Newton Equations"
 20:   (https://arxiv.org/abs/1510.06378).

 22:   Q[i] = S[i] - (B_i)^{-1}*Y[i] terms are computed ahead of time whenever
 23:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 24:   repeated calls of MatMult inside KSP solvers without unnecessarily
 25:   recomputing Q[i] terms in expensive nested-loops.

 27:   dX <- J0^{-1} * F
 28:   for i = 0,1,2,...,k
 29:     # Q[i] = S[i] - (B_i)^{-1}*Y[i]
 30:     zeta = (Q[i]^T F) / (Q[i]^T Y[i])
 31:     dX <- dX + (zeta * Q[i])
 32:   end
 33: */
 34: static PetscErrorCode MatSolve_LMVMSR1(Mat B, Vec F, Vec dX)
 35: {
 36:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
 37:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
 38:   PetscInt    i, j;
 39:   PetscScalar qjtyi, qtf, ytq;

 41:   VecCheckSameSize(F, 2, dX, 3);
 42:   VecCheckMatCompatible(B, dX, 3, F, 2);

 44:   if (lsr1->needQ) {
 45:     /* Pre-compute (Q[i] = S[i] - (B_i)^{-1} * Y[i]) and (Y[i]^T Q[i]) */
 46:     for (i = 0; i <= lmvm->k; ++i) {
 47:       MatLMVMApplyJ0Inv(B, lmvm->Y[i], lsr1->Q[i]);
 48:       VecAYPX(lsr1->Q[i], -1.0, lmvm->S[i]);
 49:       for (j = 0; j <= i - 1; ++j) {
 50:         VecDot(lsr1->Q[j], lmvm->Y[i], &qjtyi);
 51:         VecAXPY(lsr1->Q[i], -PetscRealPart(qjtyi) / lsr1->ytq[j], lsr1->Q[j]);
 52:       }
 53:       VecDot(lmvm->Y[i], lsr1->Q[i], &ytq);
 54:       lsr1->ytq[i] = PetscRealPart(ytq);
 55:     }
 56:     lsr1->needQ = PETSC_FALSE;
 57:   }

 59:   /* Invert the initial Jacobian onto F (or apply scaling) */
 60:   MatLMVMApplyJ0Inv(B, F, dX);
 61:   /* Start outer loop */
 62:   for (i = 0; i <= lmvm->k; ++i) {
 63:     VecDot(lsr1->Q[i], F, &qtf);
 64:     VecAXPY(dX, PetscRealPart(qtf) / lsr1->ytq[i], lsr1->Q[i]);
 65:   }
 66:   return 0;
 67: }

 69: /*------------------------------------------------------------*/

 71: /*
 72:   The forward product is the matrix-free implementation of
 73:   Equation (6.24) in Nocedal and Wright "Numerical Optimization"
 74:   2nd edition, pg 144.

 76:   Note that the structure of the forward product is identical to
 77:   the solution, with S and Y exchanging roles.

 79:   P[i] = Y[i] - (B_i)*S[i] terms are computed ahead of time whenever
 80:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 81:   repeated calls of MatMult inside KSP solvers without unnecessarily
 82:   recomputing P[i] terms in expensive nested-loops.

 84:   Z <- J0 * X
 85:   for i = 0,1,2,...,k
 86:     # P[i] = Y[i] - (B_i)*S[i]
 87:     zeta = (P[i]^T X) / (P[i]^T S[i])
 88:     Z <- Z + (zeta * P[i])
 89:   end
 90: */
 91: static PetscErrorCode MatMult_LMVMSR1(Mat B, Vec X, Vec Z)
 92: {
 93:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
 94:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
 95:   PetscInt    i, j;
 96:   PetscScalar pjtsi, ptx, stp;

 98:   VecCheckSameSize(X, 2, Z, 3);
 99:   VecCheckMatCompatible(B, X, 2, Z, 3);

101:   if (lsr1->needP) {
102:     /* Pre-compute (P[i] = Y[i] - (B_i) * S[i]) and (S[i]^T P[i]) */
103:     for (i = 0; i <= lmvm->k; ++i) {
104:       MatLMVMApplyJ0Fwd(B, lmvm->S[i], lsr1->P[i]);
105:       VecAYPX(lsr1->P[i], -1.0, lmvm->Y[i]);
106:       for (j = 0; j <= i - 1; ++j) {
107:         VecDot(lsr1->P[j], lmvm->S[i], &pjtsi);
108:         VecAXPY(lsr1->P[i], -PetscRealPart(pjtsi) / lsr1->stp[j], lsr1->P[j]);
109:       }
110:       VecDot(lmvm->S[i], lsr1->P[i], &stp);
111:       lsr1->stp[i] = PetscRealPart(stp);
112:     }
113:     lsr1->needP = PETSC_FALSE;
114:   }

116:   MatLMVMApplyJ0Fwd(B, X, Z);
117:   for (i = 0; i <= lmvm->k; ++i) {
118:     VecDot(lsr1->P[i], X, &ptx);
119:     VecAXPY(Z, PetscRealPart(ptx) / lsr1->stp[i], lsr1->P[i]);
120:   }
121:   return 0;
122: }

124: /*------------------------------------------------------------*/

126: static PetscErrorCode MatUpdate_LMVMSR1(Mat B, Vec X, Vec F)
127: {
128:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
129:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
130:   PetscReal   snorm, pnorm;
131:   PetscScalar sktw;

133:   if (!lmvm->m) return 0;
134:   if (lmvm->prev_set) {
135:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
136:     VecAYPX(lmvm->Xprev, -1.0, X);
137:     VecAYPX(lmvm->Fprev, -1.0, F);
138:     /* See if the updates can be accepted
139:        NOTE: This tests abs(S[k]^T (Y[k] - B_k*S[k])) >= eps * norm(S[k]) * norm(Y[k] - B_k*S[k]) */
140:     MatMult(B, lmvm->Xprev, lsr1->work);
141:     VecAYPX(lsr1->work, -1.0, lmvm->Fprev);
142:     VecDot(lmvm->Xprev, lsr1->work, &sktw);
143:     VecNorm(lmvm->Xprev, NORM_2, &snorm);
144:     VecNorm(lsr1->work, NORM_2, &pnorm);
145:     if (PetscAbsReal(PetscRealPart(sktw)) >= lmvm->eps * snorm * pnorm) {
146:       /* Update is good, accept it */
147:       lsr1->needP = lsr1->needQ = PETSC_TRUE;
148:       MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev);
149:     } else {
150:       /* Update is bad, skip it */
151:       ++lmvm->nrejects;
152:     }
153:   }
154:   /* Save the solution and function to be used in the next update */
155:   VecCopy(X, lmvm->Xprev);
156:   VecCopy(F, lmvm->Fprev);
157:   lmvm->prev_set = PETSC_TRUE;
158:   return 0;
159: }

161: /*------------------------------------------------------------*/

163: static PetscErrorCode MatCopy_LMVMSR1(Mat B, Mat M, MatStructure str)
164: {
165:   Mat_LMVM *bdata = (Mat_LMVM *)B->data;
166:   Mat_LSR1 *bctx  = (Mat_LSR1 *)bdata->ctx;
167:   Mat_LMVM *mdata = (Mat_LMVM *)M->data;
168:   Mat_LSR1 *mctx  = (Mat_LSR1 *)mdata->ctx;
169:   PetscInt  i;

171:   mctx->needP = bctx->needP;
172:   mctx->needQ = bctx->needQ;
173:   for (i = 0; i <= bdata->k; ++i) {
174:     mctx->stp[i] = bctx->stp[i];
175:     mctx->ytq[i] = bctx->ytq[i];
176:     VecCopy(bctx->P[i], mctx->P[i]);
177:     VecCopy(bctx->Q[i], mctx->Q[i]);
178:   }
179:   return 0;
180: }

182: /*------------------------------------------------------------*/

184: static PetscErrorCode MatReset_LMVMSR1(Mat B, PetscBool destructive)
185: {
186:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
187:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

189:   lsr1->needP = lsr1->needQ = PETSC_TRUE;
190:   if (destructive && lsr1->allocated) {
191:     VecDestroy(&lsr1->work);
192:     PetscFree2(lsr1->stp, lsr1->ytq);
193:     VecDestroyVecs(lmvm->m, &lsr1->P);
194:     VecDestroyVecs(lmvm->m, &lsr1->Q);
195:     lsr1->allocated = PETSC_FALSE;
196:   }
197:   MatReset_LMVM(B, destructive);
198:   return 0;
199: }

201: /*------------------------------------------------------------*/

203: static PetscErrorCode MatAllocate_LMVMSR1(Mat B, Vec X, Vec F)
204: {
205:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
206:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

208:   MatAllocate_LMVM(B, X, F);
209:   if (!lsr1->allocated) {
210:     VecDuplicate(X, &lsr1->work);
211:     PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq);
212:     if (lmvm->m > 0) {
213:       VecDuplicateVecs(X, lmvm->m, &lsr1->P);
214:       VecDuplicateVecs(X, lmvm->m, &lsr1->Q);
215:     }
216:     lsr1->allocated = PETSC_TRUE;
217:   }
218:   return 0;
219: }

221: /*------------------------------------------------------------*/

223: static PetscErrorCode MatDestroy_LMVMSR1(Mat B)
224: {
225:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
226:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

228:   if (lsr1->allocated) {
229:     VecDestroy(&lsr1->work);
230:     PetscFree2(lsr1->stp, lsr1->ytq);
231:     VecDestroyVecs(lmvm->m, &lsr1->P);
232:     VecDestroyVecs(lmvm->m, &lsr1->Q);
233:     lsr1->allocated = PETSC_FALSE;
234:   }
235:   PetscFree(lmvm->ctx);
236:   MatDestroy_LMVM(B);
237:   return 0;
238: }

240: /*------------------------------------------------------------*/

242: static PetscErrorCode MatSetUp_LMVMSR1(Mat B)
243: {
244:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
245:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

247:   MatSetUp_LMVM(B);
248:   if (!lsr1->allocated && lmvm->m > 0) {
249:     VecDuplicate(lmvm->Xprev, &lsr1->work);
250:     PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq);
251:     if (lmvm->m > 0) {
252:       VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->P);
253:       VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->Q);
254:     }
255:     lsr1->allocated = PETSC_TRUE;
256:   }
257:   return 0;
258: }

260: /*------------------------------------------------------------*/

262: PetscErrorCode MatCreate_LMVMSR1(Mat B)
263: {
264:   Mat_LMVM *lmvm;
265:   Mat_LSR1 *lsr1;

267:   MatCreate_LMVM(B);
268:   PetscObjectChangeTypeName((PetscObject)B, MATLMVMSR1);
269:   MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE);
270:   B->ops->setup   = MatSetUp_LMVMSR1;
271:   B->ops->destroy = MatDestroy_LMVMSR1;
272:   B->ops->solve   = MatSolve_LMVMSR1;

274:   lmvm                = (Mat_LMVM *)B->data;
275:   lmvm->square        = PETSC_TRUE;
276:   lmvm->ops->allocate = MatAllocate_LMVMSR1;
277:   lmvm->ops->reset    = MatReset_LMVMSR1;
278:   lmvm->ops->update   = MatUpdate_LMVMSR1;
279:   lmvm->ops->mult     = MatMult_LMVMSR1;
280:   lmvm->ops->copy     = MatCopy_LMVMSR1;

282:   PetscNew(&lsr1);
283:   lmvm->ctx       = (void *)lsr1;
284:   lsr1->allocated = PETSC_FALSE;
285:   lsr1->needP = lsr1->needQ = PETSC_TRUE;
286:   return 0;
287: }

289: /*------------------------------------------------------------*/

291: /*@
292:    MatCreateLMVMSR1 - Creates a limited-memory Symmetric-Rank-1 approximation
293:    matrix used for a Jacobian. L-SR1 is symmetric by construction, but is not
294:    guaranteed to be positive-definite.

296:    The provided local and global sizes must match the solution and function vectors
297:    used with `MatLMVMUpdate()` and `MatSolve()`. The resulting L-SR1 matrix will have
298:    storage vectors allocated with `VecCreateSeq()` in serial and `VecCreateMPI()` in
299:    parallel. To use the L-SR1 matrix with other vector types, the matrix must be
300:    created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
301:    This ensures that the internal storage and work vectors are duplicated from the
302:    correct type of vector.

304:    Collective

306:    Input Parameters:
307: +  comm - MPI communicator
308: .  n - number of local rows for storage vectors
309: -  N - global size of the storage vectors

311:    Output Parameter:
312: .  B - the matrix

314:    Level: intermediate

316:    Note:
317:    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
318:    paradigm instead of this routine directly.

320: .seealso: [](chapter_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMSR1`, `MatCreateLMVMBFGS()`, `MatCreateLMVMDFP()`,
321:           `MatCreateLMVMBrdn()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
322: @*/
323: PetscErrorCode MatCreateLMVMSR1(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
324: {
325:   MatCreate(comm, B);
326:   MatSetSizes(*B, n, n, N, N);
327:   MatSetType(*B, MATLMVMSR1);
328:   MatSetUp(*B);
329:   return 0;
330: }