Actual source code: pounders.c

  1: #include <../src/tao/leastsquares/impls/pounders/pounders.h>

  3: static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx)
  4: {
  5:   return 0;
  6: }

  8: static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx)
  9: {
 10:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx;
 11:   PetscReal     d1, d2;

 13:   /* g = A*x  (add b later)*/
 14:   MatMult(mfqP->subH, x, g);

 16:   /* f = 1/2 * x'*(Ax) + b'*x  */
 17:   VecDot(x, g, &d1);
 18:   VecDot(mfqP->subb, x, &d2);
 19:   *f = 0.5 * d1 + d2;

 21:   /* now  g = g + b */
 22:   VecAXPY(g, 1.0, mfqP->subb);
 23:   return 0;
 24: }

 26: static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum)
 27: {
 28:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
 29:   PetscInt      i, row, col;
 30:   PetscReal     fr, fc;

 32:   TaoComputeResidual(tao, x, F);
 33:   if (tao->res_weights_v) {
 34:     VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F);
 35:     VecDot(mfqP->workfvec, mfqP->workfvec, fsum);
 36:   } else if (tao->res_weights_w) {
 37:     *fsum = 0;
 38:     for (i = 0; i < tao->res_weights_n; i++) {
 39:       row = tao->res_weights_rows[i];
 40:       col = tao->res_weights_cols[i];
 41:       VecGetValues(F, 1, &row, &fr);
 42:       VecGetValues(F, 1, &col, &fc);
 43:       *fsum += tao->res_weights_w[i] * fc * fr;
 44:     }
 45:   } else {
 46:     VecDot(F, F, fsum);
 47:   }
 48:   PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum);
 50:   return 0;
 51: }

 53: static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin)
 54: {
 55: #if defined(PETSC_USE_REAL_SINGLE)
 56:   PetscReal atol = 1.0e-5;
 57: #else
 58:   PetscReal atol = 1.0e-10;
 59: #endif
 60:   PetscInt      info, its;
 61:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;

 63:   if (!mfqP->usegqt) {
 64:     PetscReal maxval;
 65:     PetscInt  i, j;

 67:     VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES);
 68:     VecAssemblyBegin(mfqP->subb);
 69:     VecAssemblyEnd(mfqP->subb);

 71:     VecSet(mfqP->subx, 0.0);

 73:     VecSet(mfqP->subndel, -1.0);
 74:     VecSet(mfqP->subpdel, +1.0);

 76:     /* Complete the lower triangle of the Hessian matrix */
 77:     for (i = 0; i < mfqP->n; i++) {
 78:       for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i];
 79:     }
 80:     MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES);
 81:     MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY);
 82:     MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY);

 84:     TaoResetStatistics(mfqP->subtao);
 85:     /* TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_DEFAULT); */
 86:     /* enforce bound constraints -- experimental */
 87:     if (tao->XU && tao->XL) {
 88:       VecCopy(tao->XU, mfqP->subxu);
 89:       VecAXPY(mfqP->subxu, -1.0, tao->solution);
 90:       VecScale(mfqP->subxu, 1.0 / mfqP->delta);
 91:       VecCopy(tao->XL, mfqP->subxl);
 92:       VecAXPY(mfqP->subxl, -1.0, tao->solution);
 93:       VecScale(mfqP->subxl, 1.0 / mfqP->delta);

 95:       VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel);
 96:       VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel);
 97:     } else {
 98:       VecCopy(mfqP->subpdel, mfqP->subxu);
 99:       VecCopy(mfqP->subndel, mfqP->subxl);
100:     }
101:     /* Make sure xu > xl */
102:     VecCopy(mfqP->subxl, mfqP->subpdel);
103:     VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu);
104:     VecMax(mfqP->subpdel, NULL, &maxval);
106:     /* Make sure xu > tao->solution > xl */
107:     VecCopy(mfqP->subxl, mfqP->subpdel);
108:     VecAXPY(mfqP->subpdel, -1.0, mfqP->subx);
109:     VecMax(mfqP->subpdel, NULL, &maxval);

112:     VecCopy(mfqP->subx, mfqP->subpdel);
113:     VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu);
114:     VecMax(mfqP->subpdel, NULL, &maxval);

117:     TaoSolve(mfqP->subtao);
118:     TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL);

120:     /* test bounds post-solution*/
121:     VecCopy(mfqP->subxl, mfqP->subpdel);
122:     VecAXPY(mfqP->subpdel, -1.0, mfqP->subx);
123:     VecMax(mfqP->subpdel, NULL, &maxval);
124:     if (maxval > 1e-5) {
125:       PetscInfo(tao, "subproblem solution < lower bound\n");
126:       tao->reason = TAO_DIVERGED_TR_REDUCTION;
127:     }

129:     VecCopy(mfqP->subx, mfqP->subpdel);
130:     VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu);
131:     VecMax(mfqP->subpdel, NULL, &maxval);
132:     if (maxval > 1e-5) {
133:       PetscInfo(tao, "subproblem solution > upper bound\n");
134:       tao->reason = TAO_DIVERGED_TR_REDUCTION;
135:     }
136:   } else {
137:     gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3);
138:   }
139:   *qmin *= -1;
140:   return 0;
141: }

143: static PetscErrorCode pounders_update_res(Tao tao)
144: {
145:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
146:   PetscInt      i, row, col;
147:   PetscBLASInt  blasn = mfqP->n, blasn2 = blasn * blasn, blasm = mfqP->m, ione = 1;
148:   PetscReal     zero = 0.0, one = 1.0, wii, factor;

150:   for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0;
151:   for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0;

153:   /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */
154:   if (tao->res_weights_v) {
155:     /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */
156:     for (i = 0; i < mfqP->m; i++) {
157:       VecGetValues(tao->res_weights_v, 1, &i, &factor);
158:       factor = factor * mfqP->C[i];
159:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione));
160:     }

162:     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
163:     /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/
164:     for (i = 0; i < mfqP->m; i++) {
165:       VecGetValues(tao->res_weights_v, 1, &i, &wii);
166:       if (tao->niter > 1) {
167:         factor = wii * mfqP->C[i];
168:         /* add wii * ci * Hi */
169:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
170:       }
171:       /* add wii * gi * gi' */
172:       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn));
173:     }
174:   } else if (tao->res_weights_w) {
175:     /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */
176:     for (i = 0; i < tao->res_weights_n; i++) {
177:       row = tao->res_weights_rows[i];
178:       col = tao->res_weights_cols[i];

180:       factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
181:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione));
182:       factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
183:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione));
184:     }

186:     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
187:     /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
188:     for (i = 0; i < tao->res_weights_n; i++) {
189:       row    = tao->res_weights_rows[i];
190:       col    = tao->res_weights_cols[i];
191:       factor = tao->res_weights_w[i] / 2.0;
192:       /* add wij * gi gj' + wij * gj gi' */
193:       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn));
194:       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn));
195:     }
196:     if (tao->niter > 1) {
197:       for (i = 0; i < tao->res_weights_n; i++) {
198:         row = tao->res_weights_rows[i];
199:         col = tao->res_weights_cols[i];

201:         /* add  wij*cj*Hi */
202:         factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
203:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione));

205:         /* add wij*ci*Hj */
206:         factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
207:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione));
208:       }
209:     }
210:   } else {
211:     /* Default: Gres= sum_i[cigi] = G*c' */
212:     PetscInfo(tao, "Identity weights\n");
213:     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione));

215:     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
216:     /*  Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)}  */
217:     PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn));

219:     /* sum(F(xkin,i)*H(:,:,i)) */
220:     if (tao->niter > 1) {
221:       for (i = 0; i < mfqP->m; i++) {
222:         factor = mfqP->C[i];
223:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
224:       }
225:     }
226:   }
227:   return 0;
228: }

230: static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi)
231: {
232:   /* Phi = .5*[x(1)^2  sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
233:   PetscInt  i, j, k;
234:   PetscReal sqrt2 = PetscSqrtReal(2.0);

236:   j = 0;
237:   for (i = 0; i < n; i++) {
238:     phi[j] = 0.5 * x[i] * x[i];
239:     j++;
240:     for (k = i + 1; k < n; k++) {
241:       phi[j] = x[i] * x[k] / sqrt2;
242:       j++;
243:     }
244:   }
245:   return 0;
246: }

248: static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP)
249: {
250:   /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
251:    that satisfies the interpolation conditions Q(X[:,j]) = f(j)
252:    for j=1,...,m and with a Hessian matrix of least Frobenius norm */

254:   /* NB --we are ignoring c */
255:   PetscInt     i, j, k, num, np = mfqP->nmodelpoints;
256:   PetscReal    one = 1.0, zero = 0.0, negone = -1.0;
257:   PetscBLASInt blasnpmax  = mfqP->npmax;
258:   PetscBLASInt blasnplus1 = mfqP->n + 1;
259:   PetscBLASInt blasnp     = np;
260:   PetscBLASInt blasint    = mfqP->n * (mfqP->n + 1) / 2;
261:   PetscBLASInt blasint2   = np - mfqP->n - 1;
262:   PetscBLASInt info, ione = 1;
263:   PetscReal    sqrt2 = PetscSqrtReal(2.0);

265:   for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0;
266:   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0;

268:   /* factor M */
269:   PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info));

272:   if (np == mfqP->n + 1) {
273:     for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0;
274:     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0;
275:   } else {
276:     /* Let Ltmp = (L'*L) */
277:     PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint));

279:     /* factor Ltmp */
280:     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info));
282:   }

284:   for (k = 0; k < mfqP->m; k++) {
285:     if (np != mfqP->n + 1) {
286:       /* Solve L'*L*Omega = Z' * RESk*/
287:       PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione));
288:       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info));

291:       /* Beta = L*Omega */
292:       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione));
293:     }

295:     /* solve M'*Alpha = RESk - N'*Beta */
296:     PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione));
297:     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info));

300:     /* Gdel(:,k) = Alpha(2:n+1) */
301:     for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1];

303:     /* Set Hdels */
304:     num = 0;
305:     for (i = 0; i < mfqP->n; i++) {
306:       /* H[i,i,k] = Beta(num) */
307:       mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num];
308:       num++;
309:       for (j = i + 1; j < mfqP->n; j++) {
310:         /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
311:         mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
312:         mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
313:         num++;
314:       }
315:     }
316:   }
317:   return 0;
318: }

320: static PetscErrorCode morepoints(TAO_POUNDERS *mfqP)
321: {
322:   /* Assumes mfqP->model_indices[0]  is minimum index
323:    Finishes adding points to mfqP->model_indices (up to npmax)
324:    Computes L,Z,M,N
325:    np is actual number of points in model (should equal npmax?) */
326:   PetscInt         point, i, j, offset;
327:   PetscInt         reject;
328:   PetscBLASInt     blasn = mfqP->n, blasnpmax = mfqP->npmax, blasnplus1 = mfqP->n + 1, info, blasnmax = mfqP->nmax, blasint, blasint2, blasnp, blasmaxmn;
329:   const PetscReal *x;
330:   PetscReal        normd;

332:   /* Initialize M,N */
333:   for (i = 0; i < mfqP->n + 1; i++) {
334:     VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x);
335:     mfqP->M[(mfqP->n + 1) * i] = 1.0;
336:     for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
337:     VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x);
338:     phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i]);
339:   }

341:   /* Now we add points until we have npmax starting with the most recent ones */
342:   point              = mfqP->nHist - 1;
343:   mfqP->nmodelpoints = mfqP->n + 1;
344:   while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) {
345:     /* Reject any points already in the model */
346:     reject = 0;
347:     for (j = 0; j < mfqP->n + 1; j++) {
348:       if (point == mfqP->model_indices[j]) {
349:         reject = 1;
350:         break;
351:       }
352:     }

354:     /* Reject if norm(d) >c2 */
355:     if (!reject) {
356:       VecCopy(mfqP->Xhist[point], mfqP->workxvec);
357:       VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex]);
358:       VecNorm(mfqP->workxvec, NORM_2, &normd);
359:       normd /= mfqP->delta;
360:       if (normd > mfqP->c2) reject = 1;
361:     }
362:     if (reject) {
363:       point--;
364:       continue;
365:     }

367:     VecGetArrayRead(mfqP->Xhist[point], &x);
368:     mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0;
369:     for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
370:     VecRestoreArrayRead(mfqP->Xhist[point], &x);
371:     phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)]);

373:     /* Update QR factorization */
374:     /* Copy M' to Q_tmp */
375:     for (i = 0; i < mfqP->n + 1; i++) {
376:       for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j];
377:     }
378:     blasnp = mfqP->nmodelpoints + 1;
379:     /* Q_tmp,R = qr(M') */
380:     blasmaxmn = PetscMax(mfqP->m, mfqP->n + 1);
381:     PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info));

384:     /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
385:     /* L = N*Qtmp */
386:     blasint2 = mfqP->n * (mfqP->n + 1) / 2;
387:     /* Copy N to L_tmp */
388:     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i];
389:     /* Copy L_save to L_tmp */

391:     /* L_tmp = N*Qtmp' */
392:     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info));

395:     /* Copy L_tmp to L_save */
396:     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i];

398:     /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
399:     blasint = mfqP->nmodelpoints - mfqP->n;
400:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
401:     PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info));
402:     PetscFPTrapPop();

405:     if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) {
406:       /* accept point */
407:       mfqP->model_indices[mfqP->nmodelpoints] = point;
408:       /* Copy Q_tmp to Q */
409:       for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i];
410:       for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i];
411:       mfqP->nmodelpoints++;
412:       blasnp = mfqP->nmodelpoints;

414:       /* Copy L_save to L */
415:       for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i];
416:     }
417:     point--;
418:   }

420:   blasnp = mfqP->nmodelpoints;
421:   /* Copy Q(:,n+2:np) to Z */
422:   /* First set Q_tmp to I */
423:   for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0;
424:   for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0;

426:   /* Q_tmp = I * Q */
427:   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));

430:   /* Copy Q_tmp(:,n+2:np) to Z) */
431:   offset = mfqP->npmax * (mfqP->n + 1);
432:   for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i];

434:   if (mfqP->nmodelpoints == mfqP->n + 1) {
435:     /* Set L to I_{n+1} */
436:     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0;
437:     for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0;
438:   }
439:   return 0;
440: }

442: /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
443: static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index)
444: {
445:   /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
446:   VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist]);
447:   VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES);
448:   VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]);
449:   VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]);
450:   VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex]);

452:   /* Project into feasible region */
453:   if (tao->XU && tao->XL) VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist]);

455:   /* Compute value of new vector */
456:   VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist]);
457:   CHKMEMQ;
458:   pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]);

460:   /* Add new vector to model */
461:   mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
462:   mfqP->nmodelpoints++;
463:   mfqP->nHist++;
464:   return 0;
465: }

467: static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints)
468: {
469:   /* modeld = Q(:,np+1:n)' */
470:   PetscInt     i, j, minindex = 0;
471:   PetscReal    dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY;
472:   PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blask, info;
473:   PetscBLASInt blas1 = 1, blasnmax = mfqP->nmax;

475:   blask = mfqP->nmodelpoints;
476:   /* Qtmp = I(n x n) */
477:   for (i = 0; i < mfqP->n; i++) {
478:     for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0;
479:   }
480:   for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0;

482:   /* Qtmp = Q * I */
483:   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));

485:   for (i = mfqP->nmodelpoints; i < mfqP->n; i++) {
486:     PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1));
487:     if (dp > 0.0) { /* Model says use the other direction! */
488:       for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1;
489:     }
490:     /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
491:     for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j];
492:     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1));
493:     PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1));
494:     if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
495:       minindex = i;
496:       minvalue = mfqP->work[i];
497:     }
498:     if (addallpoints != 0) addpoint(tao, mfqP, i);
499:   }
500:   if (!addallpoints) addpoint(tao, mfqP, minindex);
501:   return 0;
502: }

504: static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c)
505: {
506:   PetscInt         i, j;
507:   PetscBLASInt     blasm = mfqP->m, blasj, blask, blasn = mfqP->n, ione = 1, info;
508:   PetscBLASInt     blasnpmax = mfqP->npmax, blasmaxmn;
509:   PetscReal        proj, normd;
510:   const PetscReal *x;

512:   for (i = mfqP->nHist - 1; i >= 0; i--) {
513:     VecGetArrayRead(mfqP->Xhist[i], &x);
514:     for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta;
515:     VecRestoreArrayRead(mfqP->Xhist[i], &x);
516:     PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione));
517:     PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione));
518:     if (normd <= c) {
519:       blasj = PetscMax((mfqP->n - mfqP->nmodelpoints), 0);
520:       if (!mfqP->q_is_I) {
521:         /* project D onto null */
522:         blask = (mfqP->nmodelpoints);
523:         PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info));
525:       }
526:       PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione));

528:       if (proj >= mfqP->theta1) { /* add this index to model */
529:         mfqP->model_indices[mfqP->nmodelpoints] = i;
530:         mfqP->nmodelpoints++;
531:         PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione));
532:         blask = mfqP->npmax * (mfqP->nmodelpoints);
533:         PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione));
534:         blask     = mfqP->nmodelpoints;
535:         blasmaxmn = PetscMax(mfqP->m, mfqP->n);
536:         PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info));
538:         mfqP->q_is_I = 0;
539:       }
540:       if (mfqP->nmodelpoints == mfqP->n) break;
541:     }
542:   }

544:   return 0;
545: }

547: static PetscErrorCode TaoSolve_POUNDERS(Tao tao)
548: {
549:   TAO_POUNDERS    *mfqP = (TAO_POUNDERS *)tao->data;
550:   PetscInt         i, ii, j, k, l;
551:   PetscReal        step = 1.0;
552:   PetscInt         low, high;
553:   PetscReal        minnorm;
554:   PetscReal       *x, *f;
555:   const PetscReal *xmint, *fmin;
556:   PetscReal        deltaold;
557:   PetscReal        gnorm;
558:   PetscBLASInt     info, ione = 1, iblas;
559:   PetscBool        valid, same;
560:   PetscReal        mdec, rho, normxsp;
561:   PetscReal        one = 1.0, zero = 0.0, ratio;
562:   PetscBLASInt     blasm, blasn, blasncopy, blasnpmax;
563:   static PetscBool set = PETSC_FALSE;

565:   /* n = # of parameters
566:      m = dimension (components) of function  */
567:   PetscCall(PetscCitationsRegister("@article{UNEDF0,\n"
568:                                    "title = {Nuclear energy density optimization},\n"
569:                                    "author = {Kortelainen, M.  and Lesinski, T.  and Mor\'e, J.  and Nazarewicz, W.\n"
570:                                    "          and Sarich, J.  and Schunck, N.  and Stoitsov, M. V. and Wild, S. },\n"
571:                                    "journal = {Phys. Rev. C},\n"
572:                                    "volume = {82},\n"
573:                                    "number = {2},\n"
574:                                    "pages = {024313},\n"
575:                                    "numpages = {18},\n"
576:                                    "year = {2010},\n"
577:                                    "month = {Aug},\n"
578:                                    "doi = {10.1103/PhysRevC.82.024313}\n}\n",
579:                                    &set));
580:   tao->niter = 0;
581:   if (tao->XL && tao->XU) {
582:     /* Check x0 <= XU */
583:     PetscReal val;

585:     VecCopy(tao->solution, mfqP->Xhist[0]);
586:     VecAXPY(mfqP->Xhist[0], -1.0, tao->XU);
587:     VecMax(mfqP->Xhist[0], NULL, &val);

590:     /* Check x0 >= xl */
591:     VecCopy(tao->XL, mfqP->Xhist[0]);
592:     VecAXPY(mfqP->Xhist[0], -1.0, tao->solution);
593:     VecMax(mfqP->Xhist[0], NULL, &val);

596:     /* Check x0 + delta < XU  -- should be able to get around this eventually */

598:     VecSet(mfqP->Xhist[0], mfqP->delta);
599:     VecAXPY(mfqP->Xhist[0], 1.0, tao->solution);
600:     VecAXPY(mfqP->Xhist[0], -1.0, tao->XU);
601:     VecMax(mfqP->Xhist[0], NULL, &val);
603:   }

605:   blasm     = mfqP->m;
606:   blasn     = mfqP->n;
607:   blasnpmax = mfqP->npmax;
608:   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0;

610:   VecCopy(tao->solution, mfqP->Xhist[0]);

612:   /* This provides enough information to approximate the gradient of the objective */
613:   /* using a forward difference scheme. */

615:   PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta);
616:   pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0]);
617:   mfqP->minindex = 0;
618:   minnorm        = mfqP->Fres[0];

620:   VecGetOwnershipRange(mfqP->Xhist[0], &low, &high);
621:   for (i = 1; i < mfqP->n + 1; ++i) {
622:     VecCopy(mfqP->Xhist[0], mfqP->Xhist[i]);

624:     if (i - 1 >= low && i - 1 < high) {
625:       VecGetArray(mfqP->Xhist[i], &x);
626:       x[i - 1 - low] += mfqP->delta;
627:       VecRestoreArray(mfqP->Xhist[i], &x);
628:     }
629:     CHKMEMQ;
630:     pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i]);
631:     if (mfqP->Fres[i] < minnorm) {
632:       mfqP->minindex = i;
633:       minnorm        = mfqP->Fres[i];
634:     }
635:   }
636:   VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution);
637:   VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res);
638:   PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm);

640:   /* Gather mpi vecs to one big local vec */

642:   /* Begin serial code */

644:   /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
645:   /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
646:   /* (Column oriented for blas calls) */
647:   ii = 0;

649:   PetscInfo(tao, "Build matrix: %" PetscInt_FMT "\n", (PetscInt)mfqP->size);
650:   if (1 == mfqP->size) {
651:     VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint);
652:     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
653:     VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint);
654:     VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin);
655:     for (i = 0; i < mfqP->n + 1; i++) {
656:       if (i == mfqP->minindex) continue;

658:       VecGetArray(mfqP->Xhist[i], &x);
659:       for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
660:       VecRestoreArray(mfqP->Xhist[i], &x);

662:       VecGetArray(mfqP->Fhist[i], &f);
663:       for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
664:       VecRestoreArray(mfqP->Fhist[i], &f);

666:       mfqP->model_indices[ii++] = i;
667:     }
668:     for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
669:     VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin);
670:   } else {
671:     VecSet(mfqP->localxmin, 0);
672:     VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD);
673:     VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD);

675:     VecGetArrayRead(mfqP->localxmin, &xmint);
676:     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
677:     VecRestoreArrayRead(mfqP->localxmin, &xmint);

679:     VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD);
680:     VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD);
681:     VecGetArrayRead(mfqP->localfmin, &fmin);
682:     for (i = 0; i < mfqP->n + 1; i++) {
683:       if (i == mfqP->minindex) continue;

685:       VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD);
686:       VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD);
687:       VecGetArray(mfqP->localx, &x);
688:       for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
689:       VecRestoreArray(mfqP->localx, &x);

691:       VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD);
692:       VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD);
693:       VecGetArray(mfqP->localf, &f);
694:       for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
695:       VecRestoreArray(mfqP->localf, &f);

697:       mfqP->model_indices[ii++] = i;
698:     }
699:     for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
700:     VecRestoreArrayRead(mfqP->localfmin, &fmin);
701:   }

703:   /* Determine the initial quadratic models */
704:   /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
705:   /* D (nxn) Fdiff (nxm)  => G (nxm) */
706:   blasncopy = blasn;
707:   PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info));
708:   PetscInfo(tao, "Linear solve return: %" PetscInt_FMT "\n", (PetscInt)info);

710:   pounders_update_res(tao);

712:   valid = PETSC_TRUE;

714:   VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES);
715:   VecAssemblyBegin(tao->gradient);
716:   VecAssemblyEnd(tao->gradient);
717:   VecNorm(tao->gradient, NORM_2, &gnorm);
718:   gnorm *= mfqP->delta;
719:   VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution);

721:   tao->reason = TAO_CONTINUE_ITERATING;
722:   TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its);
723:   TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step);
724:   PetscUseTypeMethod(tao, convergencetest, tao->cnvP);

726:   mfqP->nHist        = mfqP->n + 1;
727:   mfqP->nmodelpoints = mfqP->n + 1;
728:   PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm);

730:   while (tao->reason == TAO_CONTINUE_ITERATING) {
731:     PetscReal gnm = 1e-4;
732:     /* Call general purpose update function */
733:     PetscTryTypeMethod(tao, update, tao->niter, tao->user_update);
734:     tao->niter++;
735:     /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */
736:     gqtwrap(tao, &gnm, &mdec);
737:     /* Evaluate the function at the new point */

739:     for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i];
740:     VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist]);
741:     VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist]);
742:     VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES);
743:     VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]);
744:     VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]);

746:     pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]);

748:     rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
749:     mfqP->nHist++;

751:     /* Update the center */
752:     if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) {
753:       /* Update model to reflect new base point */
754:       for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta;
755:       for (j = 0; j < mfqP->m; j++) {
756:         /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
757:          G(:,j) = G(:,j) + H(:,:,j)*work' */
758:         for (k = 0; k < mfqP->n; k++) {
759:           mfqP->work2[k] = 0.0;
760:           for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l];
761:         }
762:         for (i = 0; i < mfqP->n; i++) {
763:           mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]);
764:           mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i];
765:         }
766:       }
767:       /* Cres += work*Gres + .5*work*Hres*work';
768:        Gres += Hres*work'; */

770:       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione));
771:       for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i];
772:       mfqP->minindex = mfqP->nHist - 1;
773:       minnorm        = mfqP->Fres[mfqP->minindex];
774:       VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res);
775:       /* Change current center */
776:       VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint);
777:       for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
778:       VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint);
779:     }

781:     /* Evaluate at a model-improving point if necessary */
782:     if (valid == PETSC_FALSE) {
783:       mfqP->q_is_I       = 1;
784:       mfqP->nmodelpoints = 0;
785:       affpoints(mfqP, mfqP->xmin, mfqP->c1);
786:       if (mfqP->nmodelpoints < mfqP->n) {
787:         PetscInfo(tao, "Model not valid -- model-improving\n");
788:         modelimprove(tao, mfqP, 1);
789:       }
790:     }

792:     /* Update the trust region radius */
793:     deltaold = mfqP->delta;
794:     normxsp  = 0;
795:     for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i];
796:     normxsp = PetscSqrtReal(normxsp);
797:     if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) {
798:       mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax);
799:     } else if (valid == PETSC_TRUE) {
800:       mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin);
801:     }

803:     /* Compute the next interpolation set */
804:     mfqP->q_is_I       = 1;
805:     mfqP->nmodelpoints = 0;
806:     PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1);
807:     affpoints(mfqP, mfqP->xmin, mfqP->c1);
808:     if (mfqP->nmodelpoints == mfqP->n) {
809:       valid = PETSC_TRUE;
810:     } else {
811:       valid = PETSC_FALSE;
812:       PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2);
813:       affpoints(mfqP, mfqP->xmin, mfqP->c2);
814:       if (mfqP->n > mfqP->nmodelpoints) {
815:         PetscInfo(tao, "Model not valid -- adding geometry points\n");
816:         modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints);
817:       }
818:     }
819:     for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1];
820:     mfqP->nmodelpoints++;
821:     mfqP->model_indices[0] = mfqP->minindex;
822:     morepoints(mfqP);
823:     for (i = 0; i < mfqP->nmodelpoints; i++) {
824:       VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x);
825:       for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold;
826:       VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x);
827:       VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f);
828:       for (j = 0; j < mfqP->m; j++) {
829:         for (k = 0; k < mfqP->n; k++) {
830:           mfqP->work[k] = 0.0;
831:           for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l];
832:         }
833:         PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]);
834:       }
835:       VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f);
836:     }

838:     /* Update the quadratic model */
839:     PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints);
840:     getquadpounders(mfqP);
841:     VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin);
842:     PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione));
843:     /* G = G*(delta/deltaold) + Gdel */
844:     ratio = mfqP->delta / deltaold;
845:     iblas = blasm * blasn;
846:     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione));
847:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione));
848:     /* H = H*(delta/deltaold)^2 + Hdel */
849:     iblas = blasm * blasn * blasn;
850:     ratio *= ratio;
851:     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione));
852:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione));

854:     /* Get residuals */
855:     pounders_update_res(tao);

857:     /* Export solution and gradient residual to TAO */
858:     VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution);
859:     VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES);
860:     VecAssemblyBegin(tao->gradient);
861:     VecAssemblyEnd(tao->gradient);
862:     VecNorm(tao->gradient, NORM_2, &gnorm);
863:     gnorm *= mfqP->delta;
864:     /*  final criticality test */
865:     TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its);
866:     TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step);
867:     PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
868:     /* test for repeated model */
869:     if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) {
870:       same = PETSC_TRUE;
871:     } else {
872:       same = PETSC_FALSE;
873:     }
874:     for (i = 0; i < mfqP->nmodelpoints; i++) {
875:       if (same) {
876:         if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) {
877:           same = PETSC_TRUE;
878:         } else {
879:           same = PETSC_FALSE;
880:         }
881:       }
882:       mfqP->last_model_indices[i] = mfqP->model_indices[i];
883:     }
884:     mfqP->last_nmodelpoints = mfqP->nmodelpoints;
885:     if (same && mfqP->delta == deltaold) {
886:       PetscInfo(tao, "Identical model used in successive iterations\n");
887:       tao->reason = TAO_CONVERGED_STEPTOL;
888:     }
889:   }
890:   return 0;
891: }

893: static PetscErrorCode TaoSetUp_POUNDERS(Tao tao)
894: {
895:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
896:   PetscInt      i, j;
897:   IS            isfloc, isfglob, isxloc, isxglob;

899:   if (!tao->gradient) VecDuplicate(tao->solution, &tao->gradient);
900:   if (!tao->stepdirection) VecDuplicate(tao->solution, &tao->stepdirection);
901:   VecGetSize(tao->solution, &mfqP->n);
902:   VecGetSize(tao->ls_res, &mfqP->m);
903:   mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
904:   if (mfqP->npmax == PETSC_DEFAULT) mfqP->npmax = 2 * mfqP->n + 1;
905:   mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax);
906:   mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2);

908:   PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist);
909:   PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist);
910:   for (i = 0; i < mfqP->n + 1; i++) {
911:     VecDuplicate(tao->solution, &mfqP->Xhist[i]);
912:     VecDuplicate(tao->ls_res, &mfqP->Fhist[i]);
913:   }
914:   VecDuplicate(tao->solution, &mfqP->workxvec);
915:   VecDuplicate(tao->ls_res, &mfqP->workfvec);
916:   mfqP->nHist = 0;

918:   PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres);
919:   PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES);
920:   PetscMalloc1(mfqP->n, &mfqP->work);
921:   PetscMalloc1(mfqP->n, &mfqP->work2);
922:   PetscMalloc1(mfqP->n, &mfqP->work3);
923:   PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork);
924:   PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega);
925:   PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta);
926:   PetscMalloc1(mfqP->n + 1, &mfqP->alpha);

928:   PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H);
929:   PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q);
930:   PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp);
931:   PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L);
932:   PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp);
933:   PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save);
934:   PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N);
935:   PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M);
936:   PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z);
937:   PetscMalloc1(mfqP->npmax, &mfqP->tau);
938:   PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp);
939:   mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2);
940:   PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork);
941:   PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork);
942:   PetscMalloc1(mfqP->n, &mfqP->xmin);
943:   PetscMalloc1(mfqP->m, &mfqP->C);
944:   PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff);
945:   PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp);
946:   PetscMalloc1(mfqP->n, &mfqP->Gres);
947:   PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres);
948:   PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints);
949:   PetscMalloc1(mfqP->npmax, &mfqP->model_indices);
950:   PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices);
951:   PetscMalloc1(mfqP->n, &mfqP->Xsubproblem);
952:   PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel);
953:   PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel);
954:   PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices);
955:   PetscMalloc1(mfqP->n, &mfqP->iwork);
956:   PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w);
957:   for (i = 0; i < mfqP->m; i++) {
958:     for (j = 0; j < mfqP->m; j++) {
959:       if (i == j) {
960:         mfqP->w[i + mfqP->m * j] = 1.0;
961:       } else {
962:         mfqP->w[i + mfqP->m * j] = 0.0;
963:       }
964:     }
965:   }
966:   for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i;
967:   MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size);
968:   if (mfqP->size > 1) {
969:     VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx);
970:     VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin);
971:     VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf);
972:     VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin);
973:     ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc);
974:     ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob);
975:     ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc);
976:     ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob);

978:     VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx);
979:     VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf);

981:     ISDestroy(&isxloc);
982:     ISDestroy(&isxglob);
983:     ISDestroy(&isfloc);
984:     ISDestroy(&isfglob);
985:   }

987:   if (!mfqP->usegqt) {
988:     KSP ksp;
989:     PC  pc;
990:     VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx);
991:     VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl);
992:     VecDuplicate(mfqP->subxl, &mfqP->subb);
993:     VecDuplicate(mfqP->subxl, &mfqP->subxu);
994:     VecDuplicate(mfqP->subxl, &mfqP->subpdel);
995:     VecDuplicate(mfqP->subxl, &mfqP->subndel);
996:     TaoCreate(PETSC_COMM_SELF, &mfqP->subtao);
997:     PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1);
998:     TaoSetType(mfqP->subtao, TAOBNTR);
999:     TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_");
1000:     TaoSetSolution(mfqP->subtao, mfqP->subx);
1001:     TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP);
1002:     TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits);
1003:     TaoSetFromOptions(mfqP->subtao);
1004:     TaoGetKSP(mfqP->subtao, &ksp);
1005:     if (ksp) {
1006:       KSPGetPC(ksp, &pc);
1007:       PCSetType(pc, PCNONE);
1008:     }
1009:     TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu);
1010:     MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH);
1011:     TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP);
1012:   }
1013:   return 0;
1014: }

1016: static PetscErrorCode TaoDestroy_POUNDERS(Tao tao)
1017: {
1018:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1019:   PetscInt      i;

1021:   if (!mfqP->usegqt) {
1022:     TaoDestroy(&mfqP->subtao);
1023:     VecDestroy(&mfqP->subx);
1024:     VecDestroy(&mfqP->subxl);
1025:     VecDestroy(&mfqP->subxu);
1026:     VecDestroy(&mfqP->subb);
1027:     VecDestroy(&mfqP->subpdel);
1028:     VecDestroy(&mfqP->subndel);
1029:     MatDestroy(&mfqP->subH);
1030:   }
1031:   PetscFree(mfqP->Fres);
1032:   PetscFree(mfqP->RES);
1033:   PetscFree(mfqP->work);
1034:   PetscFree(mfqP->work2);
1035:   PetscFree(mfqP->work3);
1036:   PetscFree(mfqP->mwork);
1037:   PetscFree(mfqP->omega);
1038:   PetscFree(mfqP->beta);
1039:   PetscFree(mfqP->alpha);
1040:   PetscFree(mfqP->H);
1041:   PetscFree(mfqP->Q);
1042:   PetscFree(mfqP->Q_tmp);
1043:   PetscFree(mfqP->L);
1044:   PetscFree(mfqP->L_tmp);
1045:   PetscFree(mfqP->L_save);
1046:   PetscFree(mfqP->N);
1047:   PetscFree(mfqP->M);
1048:   PetscFree(mfqP->Z);
1049:   PetscFree(mfqP->tau);
1050:   PetscFree(mfqP->tau_tmp);
1051:   PetscFree(mfqP->npmaxwork);
1052:   PetscFree(mfqP->npmaxiwork);
1053:   PetscFree(mfqP->xmin);
1054:   PetscFree(mfqP->C);
1055:   PetscFree(mfqP->Fdiff);
1056:   PetscFree(mfqP->Disp);
1057:   PetscFree(mfqP->Gres);
1058:   PetscFree(mfqP->Hres);
1059:   PetscFree(mfqP->Gpoints);
1060:   PetscFree(mfqP->model_indices);
1061:   PetscFree(mfqP->last_model_indices);
1062:   PetscFree(mfqP->Xsubproblem);
1063:   PetscFree(mfqP->Gdel);
1064:   PetscFree(mfqP->Hdel);
1065:   PetscFree(mfqP->indices);
1066:   PetscFree(mfqP->iwork);
1067:   PetscFree(mfqP->w);
1068:   for (i = 0; i < mfqP->nHist; i++) {
1069:     VecDestroy(&mfqP->Xhist[i]);
1070:     VecDestroy(&mfqP->Fhist[i]);
1071:   }
1072:   VecDestroy(&mfqP->workxvec);
1073:   VecDestroy(&mfqP->workfvec);
1074:   PetscFree(mfqP->Xhist);
1075:   PetscFree(mfqP->Fhist);

1077:   if (mfqP->size > 1) {
1078:     VecDestroy(&mfqP->localx);
1079:     VecDestroy(&mfqP->localxmin);
1080:     VecDestroy(&mfqP->localf);
1081:     VecDestroy(&mfqP->localfmin);
1082:   }
1083:   PetscFree(tao->data);
1084:   return 0;
1085: }

1087: static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject)
1088: {
1089:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;

1091:   PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization");
1092:   PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL);
1093:   mfqP->delta = mfqP->delta0;
1094:   PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL);
1095:   PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL);
1096:   PetscOptionsHeadEnd();
1097:   return 0;
1098: }

1100: static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer)
1101: {
1102:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1103:   PetscBool     isascii;

1105:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
1106:   if (isascii) {
1107:     PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0);
1108:     PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta);
1109:     PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints);
1110:     if (mfqP->usegqt) {
1111:       PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n");
1112:     } else {
1113:       TaoView(mfqP->subtao, viewer);
1114:     }
1115:   }
1116:   return 0;
1117: }
1118: /*MC
1119:   TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares

1121:   Options Database Keys:
1122: + -tao_pounders_delta - initial step length
1123: . -tao_pounders_npmax - maximum number of points in model
1124: - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON

1126:   Level: beginner

1128: M*/

1130: PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao)
1131: {
1132:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;

1134:   tao->ops->setup          = TaoSetUp_POUNDERS;
1135:   tao->ops->solve          = TaoSolve_POUNDERS;
1136:   tao->ops->view           = TaoView_POUNDERS;
1137:   tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1138:   tao->ops->destroy        = TaoDestroy_POUNDERS;

1140:   PetscNew(&mfqP);
1141:   tao->data = (void *)mfqP;
1142:   /* Override default settings (unless already changed) */
1143:   if (!tao->max_it_changed) tao->max_it = 2000;
1144:   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
1145:   mfqP->npmax      = PETSC_DEFAULT;
1146:   mfqP->delta0     = 0.1;
1147:   mfqP->delta      = 0.1;
1148:   mfqP->deltamax   = 1e3;
1149:   mfqP->deltamin   = 1e-6;
1150:   mfqP->c2         = 10.0;
1151:   mfqP->theta1     = 1e-5;
1152:   mfqP->theta2     = 1e-4;
1153:   mfqP->gamma0     = 0.5;
1154:   mfqP->gamma1     = 2.0;
1155:   mfqP->eta0       = 0.0;
1156:   mfqP->eta1       = 0.1;
1157:   mfqP->usegqt     = PETSC_FALSE;
1158:   mfqP->gqt_rtol   = 0.001;
1159:   mfqP->gqt_maxits = 50;
1160:   mfqP->workxvec   = NULL;
1161:   return 0;
1162: }