Actual source code: pounders.c
1: #include <../src/tao/leastsquares/impls/pounders/pounders.h>
3: static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx)
4: {
5: return 0;
6: }
8: static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx)
9: {
10: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx;
11: PetscReal d1, d2;
13: /* g = A*x (add b later)*/
14: MatMult(mfqP->subH, x, g);
16: /* f = 1/2 * x'*(Ax) + b'*x */
17: VecDot(x, g, &d1);
18: VecDot(mfqP->subb, x, &d2);
19: *f = 0.5 * d1 + d2;
21: /* now g = g + b */
22: VecAXPY(g, 1.0, mfqP->subb);
23: return 0;
24: }
26: static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum)
27: {
28: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
29: PetscInt i, row, col;
30: PetscReal fr, fc;
32: TaoComputeResidual(tao, x, F);
33: if (tao->res_weights_v) {
34: VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F);
35: VecDot(mfqP->workfvec, mfqP->workfvec, fsum);
36: } else if (tao->res_weights_w) {
37: *fsum = 0;
38: for (i = 0; i < tao->res_weights_n; i++) {
39: row = tao->res_weights_rows[i];
40: col = tao->res_weights_cols[i];
41: VecGetValues(F, 1, &row, &fr);
42: VecGetValues(F, 1, &col, &fc);
43: *fsum += tao->res_weights_w[i] * fc * fr;
44: }
45: } else {
46: VecDot(F, F, fsum);
47: }
48: PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum);
50: return 0;
51: }
53: static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin)
54: {
55: #if defined(PETSC_USE_REAL_SINGLE)
56: PetscReal atol = 1.0e-5;
57: #else
58: PetscReal atol = 1.0e-10;
59: #endif
60: PetscInt info, its;
61: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
63: if (!mfqP->usegqt) {
64: PetscReal maxval;
65: PetscInt i, j;
67: VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES);
68: VecAssemblyBegin(mfqP->subb);
69: VecAssemblyEnd(mfqP->subb);
71: VecSet(mfqP->subx, 0.0);
73: VecSet(mfqP->subndel, -1.0);
74: VecSet(mfqP->subpdel, +1.0);
76: /* Complete the lower triangle of the Hessian matrix */
77: for (i = 0; i < mfqP->n; i++) {
78: for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i];
79: }
80: MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES);
81: MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY);
82: MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY);
84: TaoResetStatistics(mfqP->subtao);
85: /* TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_DEFAULT); */
86: /* enforce bound constraints -- experimental */
87: if (tao->XU && tao->XL) {
88: VecCopy(tao->XU, mfqP->subxu);
89: VecAXPY(mfqP->subxu, -1.0, tao->solution);
90: VecScale(mfqP->subxu, 1.0 / mfqP->delta);
91: VecCopy(tao->XL, mfqP->subxl);
92: VecAXPY(mfqP->subxl, -1.0, tao->solution);
93: VecScale(mfqP->subxl, 1.0 / mfqP->delta);
95: VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel);
96: VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel);
97: } else {
98: VecCopy(mfqP->subpdel, mfqP->subxu);
99: VecCopy(mfqP->subndel, mfqP->subxl);
100: }
101: /* Make sure xu > xl */
102: VecCopy(mfqP->subxl, mfqP->subpdel);
103: VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu);
104: VecMax(mfqP->subpdel, NULL, &maxval);
106: /* Make sure xu > tao->solution > xl */
107: VecCopy(mfqP->subxl, mfqP->subpdel);
108: VecAXPY(mfqP->subpdel, -1.0, mfqP->subx);
109: VecMax(mfqP->subpdel, NULL, &maxval);
112: VecCopy(mfqP->subx, mfqP->subpdel);
113: VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu);
114: VecMax(mfqP->subpdel, NULL, &maxval);
117: TaoSolve(mfqP->subtao);
118: TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL);
120: /* test bounds post-solution*/
121: VecCopy(mfqP->subxl, mfqP->subpdel);
122: VecAXPY(mfqP->subpdel, -1.0, mfqP->subx);
123: VecMax(mfqP->subpdel, NULL, &maxval);
124: if (maxval > 1e-5) {
125: PetscInfo(tao, "subproblem solution < lower bound\n");
126: tao->reason = TAO_DIVERGED_TR_REDUCTION;
127: }
129: VecCopy(mfqP->subx, mfqP->subpdel);
130: VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu);
131: VecMax(mfqP->subpdel, NULL, &maxval);
132: if (maxval > 1e-5) {
133: PetscInfo(tao, "subproblem solution > upper bound\n");
134: tao->reason = TAO_DIVERGED_TR_REDUCTION;
135: }
136: } else {
137: gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3);
138: }
139: *qmin *= -1;
140: return 0;
141: }
143: static PetscErrorCode pounders_update_res(Tao tao)
144: {
145: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
146: PetscInt i, row, col;
147: PetscBLASInt blasn = mfqP->n, blasn2 = blasn * blasn, blasm = mfqP->m, ione = 1;
148: PetscReal zero = 0.0, one = 1.0, wii, factor;
150: for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0;
151: for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0;
153: /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */
154: if (tao->res_weights_v) {
155: /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */
156: for (i = 0; i < mfqP->m; i++) {
157: VecGetValues(tao->res_weights_v, 1, &i, &factor);
158: factor = factor * mfqP->C[i];
159: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione));
160: }
162: /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
163: /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/
164: for (i = 0; i < mfqP->m; i++) {
165: VecGetValues(tao->res_weights_v, 1, &i, &wii);
166: if (tao->niter > 1) {
167: factor = wii * mfqP->C[i];
168: /* add wii * ci * Hi */
169: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
170: }
171: /* add wii * gi * gi' */
172: PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn));
173: }
174: } else if (tao->res_weights_w) {
175: /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */
176: for (i = 0; i < tao->res_weights_n; i++) {
177: row = tao->res_weights_rows[i];
178: col = tao->res_weights_cols[i];
180: factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
181: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione));
182: factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
183: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione));
184: }
186: /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
187: /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
188: for (i = 0; i < tao->res_weights_n; i++) {
189: row = tao->res_weights_rows[i];
190: col = tao->res_weights_cols[i];
191: factor = tao->res_weights_w[i] / 2.0;
192: /* add wij * gi gj' + wij * gj gi' */
193: PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn));
194: PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn));
195: }
196: if (tao->niter > 1) {
197: for (i = 0; i < tao->res_weights_n; i++) {
198: row = tao->res_weights_rows[i];
199: col = tao->res_weights_cols[i];
201: /* add wij*cj*Hi */
202: factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
203: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione));
205: /* add wij*ci*Hj */
206: factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
207: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione));
208: }
209: }
210: } else {
211: /* Default: Gres= sum_i[cigi] = G*c' */
212: PetscInfo(tao, "Identity weights\n");
213: PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione));
215: /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
216: /* Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)} */
217: PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn));
219: /* sum(F(xkin,i)*H(:,:,i)) */
220: if (tao->niter > 1) {
221: for (i = 0; i < mfqP->m; i++) {
222: factor = mfqP->C[i];
223: PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
224: }
225: }
226: }
227: return 0;
228: }
230: static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi)
231: {
232: /* Phi = .5*[x(1)^2 sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
233: PetscInt i, j, k;
234: PetscReal sqrt2 = PetscSqrtReal(2.0);
236: j = 0;
237: for (i = 0; i < n; i++) {
238: phi[j] = 0.5 * x[i] * x[i];
239: j++;
240: for (k = i + 1; k < n; k++) {
241: phi[j] = x[i] * x[k] / sqrt2;
242: j++;
243: }
244: }
245: return 0;
246: }
248: static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP)
249: {
250: /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
251: that satisfies the interpolation conditions Q(X[:,j]) = f(j)
252: for j=1,...,m and with a Hessian matrix of least Frobenius norm */
254: /* NB --we are ignoring c */
255: PetscInt i, j, k, num, np = mfqP->nmodelpoints;
256: PetscReal one = 1.0, zero = 0.0, negone = -1.0;
257: PetscBLASInt blasnpmax = mfqP->npmax;
258: PetscBLASInt blasnplus1 = mfqP->n + 1;
259: PetscBLASInt blasnp = np;
260: PetscBLASInt blasint = mfqP->n * (mfqP->n + 1) / 2;
261: PetscBLASInt blasint2 = np - mfqP->n - 1;
262: PetscBLASInt info, ione = 1;
263: PetscReal sqrt2 = PetscSqrtReal(2.0);
265: for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0;
266: for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0;
268: /* factor M */
269: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info));
272: if (np == mfqP->n + 1) {
273: for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0;
274: for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0;
275: } else {
276: /* Let Ltmp = (L'*L) */
277: PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint));
279: /* factor Ltmp */
280: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info));
282: }
284: for (k = 0; k < mfqP->m; k++) {
285: if (np != mfqP->n + 1) {
286: /* Solve L'*L*Omega = Z' * RESk*/
287: PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione));
288: PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info));
291: /* Beta = L*Omega */
292: PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione));
293: }
295: /* solve M'*Alpha = RESk - N'*Beta */
296: PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione));
297: PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info));
300: /* Gdel(:,k) = Alpha(2:n+1) */
301: for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1];
303: /* Set Hdels */
304: num = 0;
305: for (i = 0; i < mfqP->n; i++) {
306: /* H[i,i,k] = Beta(num) */
307: mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num];
308: num++;
309: for (j = i + 1; j < mfqP->n; j++) {
310: /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
311: mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
312: mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
313: num++;
314: }
315: }
316: }
317: return 0;
318: }
320: static PetscErrorCode morepoints(TAO_POUNDERS *mfqP)
321: {
322: /* Assumes mfqP->model_indices[0] is minimum index
323: Finishes adding points to mfqP->model_indices (up to npmax)
324: Computes L,Z,M,N
325: np is actual number of points in model (should equal npmax?) */
326: PetscInt point, i, j, offset;
327: PetscInt reject;
328: PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blasnplus1 = mfqP->n + 1, info, blasnmax = mfqP->nmax, blasint, blasint2, blasnp, blasmaxmn;
329: const PetscReal *x;
330: PetscReal normd;
332: /* Initialize M,N */
333: for (i = 0; i < mfqP->n + 1; i++) {
334: VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x);
335: mfqP->M[(mfqP->n + 1) * i] = 1.0;
336: for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
337: VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x);
338: phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i]);
339: }
341: /* Now we add points until we have npmax starting with the most recent ones */
342: point = mfqP->nHist - 1;
343: mfqP->nmodelpoints = mfqP->n + 1;
344: while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) {
345: /* Reject any points already in the model */
346: reject = 0;
347: for (j = 0; j < mfqP->n + 1; j++) {
348: if (point == mfqP->model_indices[j]) {
349: reject = 1;
350: break;
351: }
352: }
354: /* Reject if norm(d) >c2 */
355: if (!reject) {
356: VecCopy(mfqP->Xhist[point], mfqP->workxvec);
357: VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex]);
358: VecNorm(mfqP->workxvec, NORM_2, &normd);
359: normd /= mfqP->delta;
360: if (normd > mfqP->c2) reject = 1;
361: }
362: if (reject) {
363: point--;
364: continue;
365: }
367: VecGetArrayRead(mfqP->Xhist[point], &x);
368: mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0;
369: for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
370: VecRestoreArrayRead(mfqP->Xhist[point], &x);
371: phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)]);
373: /* Update QR factorization */
374: /* Copy M' to Q_tmp */
375: for (i = 0; i < mfqP->n + 1; i++) {
376: for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j];
377: }
378: blasnp = mfqP->nmodelpoints + 1;
379: /* Q_tmp,R = qr(M') */
380: blasmaxmn = PetscMax(mfqP->m, mfqP->n + 1);
381: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info));
384: /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
385: /* L = N*Qtmp */
386: blasint2 = mfqP->n * (mfqP->n + 1) / 2;
387: /* Copy N to L_tmp */
388: for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i];
389: /* Copy L_save to L_tmp */
391: /* L_tmp = N*Qtmp' */
392: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info));
395: /* Copy L_tmp to L_save */
396: for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i];
398: /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
399: blasint = mfqP->nmodelpoints - mfqP->n;
400: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
401: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info));
402: PetscFPTrapPop();
405: if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) {
406: /* accept point */
407: mfqP->model_indices[mfqP->nmodelpoints] = point;
408: /* Copy Q_tmp to Q */
409: for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i];
410: for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i];
411: mfqP->nmodelpoints++;
412: blasnp = mfqP->nmodelpoints;
414: /* Copy L_save to L */
415: for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i];
416: }
417: point--;
418: }
420: blasnp = mfqP->nmodelpoints;
421: /* Copy Q(:,n+2:np) to Z */
422: /* First set Q_tmp to I */
423: for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0;
424: for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0;
426: /* Q_tmp = I * Q */
427: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
430: /* Copy Q_tmp(:,n+2:np) to Z) */
431: offset = mfqP->npmax * (mfqP->n + 1);
432: for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i];
434: if (mfqP->nmodelpoints == mfqP->n + 1) {
435: /* Set L to I_{n+1} */
436: for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0;
437: for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0;
438: }
439: return 0;
440: }
442: /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
443: static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index)
444: {
445: /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
446: VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist]);
447: VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES);
448: VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]);
449: VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]);
450: VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex]);
452: /* Project into feasible region */
453: if (tao->XU && tao->XL) VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist]);
455: /* Compute value of new vector */
456: VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist]);
457: CHKMEMQ;
458: pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]);
460: /* Add new vector to model */
461: mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
462: mfqP->nmodelpoints++;
463: mfqP->nHist++;
464: return 0;
465: }
467: static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints)
468: {
469: /* modeld = Q(:,np+1:n)' */
470: PetscInt i, j, minindex = 0;
471: PetscReal dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY;
472: PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blask, info;
473: PetscBLASInt blas1 = 1, blasnmax = mfqP->nmax;
475: blask = mfqP->nmodelpoints;
476: /* Qtmp = I(n x n) */
477: for (i = 0; i < mfqP->n; i++) {
478: for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0;
479: }
480: for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0;
482: /* Qtmp = Q * I */
483: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
485: for (i = mfqP->nmodelpoints; i < mfqP->n; i++) {
486: PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1));
487: if (dp > 0.0) { /* Model says use the other direction! */
488: for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1;
489: }
490: /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
491: for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j];
492: PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1));
493: PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1));
494: if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
495: minindex = i;
496: minvalue = mfqP->work[i];
497: }
498: if (addallpoints != 0) addpoint(tao, mfqP, i);
499: }
500: if (!addallpoints) addpoint(tao, mfqP, minindex);
501: return 0;
502: }
504: static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c)
505: {
506: PetscInt i, j;
507: PetscBLASInt blasm = mfqP->m, blasj, blask, blasn = mfqP->n, ione = 1, info;
508: PetscBLASInt blasnpmax = mfqP->npmax, blasmaxmn;
509: PetscReal proj, normd;
510: const PetscReal *x;
512: for (i = mfqP->nHist - 1; i >= 0; i--) {
513: VecGetArrayRead(mfqP->Xhist[i], &x);
514: for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta;
515: VecRestoreArrayRead(mfqP->Xhist[i], &x);
516: PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione));
517: PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione));
518: if (normd <= c) {
519: blasj = PetscMax((mfqP->n - mfqP->nmodelpoints), 0);
520: if (!mfqP->q_is_I) {
521: /* project D onto null */
522: blask = (mfqP->nmodelpoints);
523: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info));
525: }
526: PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione));
528: if (proj >= mfqP->theta1) { /* add this index to model */
529: mfqP->model_indices[mfqP->nmodelpoints] = i;
530: mfqP->nmodelpoints++;
531: PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione));
532: blask = mfqP->npmax * (mfqP->nmodelpoints);
533: PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione));
534: blask = mfqP->nmodelpoints;
535: blasmaxmn = PetscMax(mfqP->m, mfqP->n);
536: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info));
538: mfqP->q_is_I = 0;
539: }
540: if (mfqP->nmodelpoints == mfqP->n) break;
541: }
542: }
544: return 0;
545: }
547: static PetscErrorCode TaoSolve_POUNDERS(Tao tao)
548: {
549: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
550: PetscInt i, ii, j, k, l;
551: PetscReal step = 1.0;
552: PetscInt low, high;
553: PetscReal minnorm;
554: PetscReal *x, *f;
555: const PetscReal *xmint, *fmin;
556: PetscReal deltaold;
557: PetscReal gnorm;
558: PetscBLASInt info, ione = 1, iblas;
559: PetscBool valid, same;
560: PetscReal mdec, rho, normxsp;
561: PetscReal one = 1.0, zero = 0.0, ratio;
562: PetscBLASInt blasm, blasn, blasncopy, blasnpmax;
563: static PetscBool set = PETSC_FALSE;
565: /* n = # of parameters
566: m = dimension (components) of function */
567: PetscCall(PetscCitationsRegister("@article{UNEDF0,\n"
568: "title = {Nuclear energy density optimization},\n"
569: "author = {Kortelainen, M. and Lesinski, T. and Mor\'e, J. and Nazarewicz, W.\n"
570: " and Sarich, J. and Schunck, N. and Stoitsov, M. V. and Wild, S. },\n"
571: "journal = {Phys. Rev. C},\n"
572: "volume = {82},\n"
573: "number = {2},\n"
574: "pages = {024313},\n"
575: "numpages = {18},\n"
576: "year = {2010},\n"
577: "month = {Aug},\n"
578: "doi = {10.1103/PhysRevC.82.024313}\n}\n",
579: &set));
580: tao->niter = 0;
581: if (tao->XL && tao->XU) {
582: /* Check x0 <= XU */
583: PetscReal val;
585: VecCopy(tao->solution, mfqP->Xhist[0]);
586: VecAXPY(mfqP->Xhist[0], -1.0, tao->XU);
587: VecMax(mfqP->Xhist[0], NULL, &val);
590: /* Check x0 >= xl */
591: VecCopy(tao->XL, mfqP->Xhist[0]);
592: VecAXPY(mfqP->Xhist[0], -1.0, tao->solution);
593: VecMax(mfqP->Xhist[0], NULL, &val);
596: /* Check x0 + delta < XU -- should be able to get around this eventually */
598: VecSet(mfqP->Xhist[0], mfqP->delta);
599: VecAXPY(mfqP->Xhist[0], 1.0, tao->solution);
600: VecAXPY(mfqP->Xhist[0], -1.0, tao->XU);
601: VecMax(mfqP->Xhist[0], NULL, &val);
603: }
605: blasm = mfqP->m;
606: blasn = mfqP->n;
607: blasnpmax = mfqP->npmax;
608: for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0;
610: VecCopy(tao->solution, mfqP->Xhist[0]);
612: /* This provides enough information to approximate the gradient of the objective */
613: /* using a forward difference scheme. */
615: PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta);
616: pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0]);
617: mfqP->minindex = 0;
618: minnorm = mfqP->Fres[0];
620: VecGetOwnershipRange(mfqP->Xhist[0], &low, &high);
621: for (i = 1; i < mfqP->n + 1; ++i) {
622: VecCopy(mfqP->Xhist[0], mfqP->Xhist[i]);
624: if (i - 1 >= low && i - 1 < high) {
625: VecGetArray(mfqP->Xhist[i], &x);
626: x[i - 1 - low] += mfqP->delta;
627: VecRestoreArray(mfqP->Xhist[i], &x);
628: }
629: CHKMEMQ;
630: pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i]);
631: if (mfqP->Fres[i] < minnorm) {
632: mfqP->minindex = i;
633: minnorm = mfqP->Fres[i];
634: }
635: }
636: VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution);
637: VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res);
638: PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm);
640: /* Gather mpi vecs to one big local vec */
642: /* Begin serial code */
644: /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
645: /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
646: /* (Column oriented for blas calls) */
647: ii = 0;
649: PetscInfo(tao, "Build matrix: %" PetscInt_FMT "\n", (PetscInt)mfqP->size);
650: if (1 == mfqP->size) {
651: VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint);
652: for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
653: VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint);
654: VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin);
655: for (i = 0; i < mfqP->n + 1; i++) {
656: if (i == mfqP->minindex) continue;
658: VecGetArray(mfqP->Xhist[i], &x);
659: for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
660: VecRestoreArray(mfqP->Xhist[i], &x);
662: VecGetArray(mfqP->Fhist[i], &f);
663: for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
664: VecRestoreArray(mfqP->Fhist[i], &f);
666: mfqP->model_indices[ii++] = i;
667: }
668: for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
669: VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin);
670: } else {
671: VecSet(mfqP->localxmin, 0);
672: VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD);
673: VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD);
675: VecGetArrayRead(mfqP->localxmin, &xmint);
676: for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
677: VecRestoreArrayRead(mfqP->localxmin, &xmint);
679: VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD);
680: VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD);
681: VecGetArrayRead(mfqP->localfmin, &fmin);
682: for (i = 0; i < mfqP->n + 1; i++) {
683: if (i == mfqP->minindex) continue;
685: VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD);
686: VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD);
687: VecGetArray(mfqP->localx, &x);
688: for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
689: VecRestoreArray(mfqP->localx, &x);
691: VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD);
692: VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD);
693: VecGetArray(mfqP->localf, &f);
694: for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
695: VecRestoreArray(mfqP->localf, &f);
697: mfqP->model_indices[ii++] = i;
698: }
699: for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
700: VecRestoreArrayRead(mfqP->localfmin, &fmin);
701: }
703: /* Determine the initial quadratic models */
704: /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
705: /* D (nxn) Fdiff (nxm) => G (nxm) */
706: blasncopy = blasn;
707: PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info));
708: PetscInfo(tao, "Linear solve return: %" PetscInt_FMT "\n", (PetscInt)info);
710: pounders_update_res(tao);
712: valid = PETSC_TRUE;
714: VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES);
715: VecAssemblyBegin(tao->gradient);
716: VecAssemblyEnd(tao->gradient);
717: VecNorm(tao->gradient, NORM_2, &gnorm);
718: gnorm *= mfqP->delta;
719: VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution);
721: tao->reason = TAO_CONTINUE_ITERATING;
722: TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its);
723: TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step);
724: PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
726: mfqP->nHist = mfqP->n + 1;
727: mfqP->nmodelpoints = mfqP->n + 1;
728: PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm);
730: while (tao->reason == TAO_CONTINUE_ITERATING) {
731: PetscReal gnm = 1e-4;
732: /* Call general purpose update function */
733: PetscTryTypeMethod(tao, update, tao->niter, tao->user_update);
734: tao->niter++;
735: /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */
736: gqtwrap(tao, &gnm, &mdec);
737: /* Evaluate the function at the new point */
739: for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i];
740: VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist]);
741: VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist]);
742: VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES);
743: VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]);
744: VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]);
746: pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]);
748: rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
749: mfqP->nHist++;
751: /* Update the center */
752: if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) {
753: /* Update model to reflect new base point */
754: for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta;
755: for (j = 0; j < mfqP->m; j++) {
756: /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
757: G(:,j) = G(:,j) + H(:,:,j)*work' */
758: for (k = 0; k < mfqP->n; k++) {
759: mfqP->work2[k] = 0.0;
760: for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l];
761: }
762: for (i = 0; i < mfqP->n; i++) {
763: mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]);
764: mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i];
765: }
766: }
767: /* Cres += work*Gres + .5*work*Hres*work';
768: Gres += Hres*work'; */
770: PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione));
771: for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i];
772: mfqP->minindex = mfqP->nHist - 1;
773: minnorm = mfqP->Fres[mfqP->minindex];
774: VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res);
775: /* Change current center */
776: VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint);
777: for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
778: VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint);
779: }
781: /* Evaluate at a model-improving point if necessary */
782: if (valid == PETSC_FALSE) {
783: mfqP->q_is_I = 1;
784: mfqP->nmodelpoints = 0;
785: affpoints(mfqP, mfqP->xmin, mfqP->c1);
786: if (mfqP->nmodelpoints < mfqP->n) {
787: PetscInfo(tao, "Model not valid -- model-improving\n");
788: modelimprove(tao, mfqP, 1);
789: }
790: }
792: /* Update the trust region radius */
793: deltaold = mfqP->delta;
794: normxsp = 0;
795: for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i];
796: normxsp = PetscSqrtReal(normxsp);
797: if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) {
798: mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax);
799: } else if (valid == PETSC_TRUE) {
800: mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin);
801: }
803: /* Compute the next interpolation set */
804: mfqP->q_is_I = 1;
805: mfqP->nmodelpoints = 0;
806: PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1);
807: affpoints(mfqP, mfqP->xmin, mfqP->c1);
808: if (mfqP->nmodelpoints == mfqP->n) {
809: valid = PETSC_TRUE;
810: } else {
811: valid = PETSC_FALSE;
812: PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2);
813: affpoints(mfqP, mfqP->xmin, mfqP->c2);
814: if (mfqP->n > mfqP->nmodelpoints) {
815: PetscInfo(tao, "Model not valid -- adding geometry points\n");
816: modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints);
817: }
818: }
819: for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1];
820: mfqP->nmodelpoints++;
821: mfqP->model_indices[0] = mfqP->minindex;
822: morepoints(mfqP);
823: for (i = 0; i < mfqP->nmodelpoints; i++) {
824: VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x);
825: for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold;
826: VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x);
827: VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f);
828: for (j = 0; j < mfqP->m; j++) {
829: for (k = 0; k < mfqP->n; k++) {
830: mfqP->work[k] = 0.0;
831: for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l];
832: }
833: PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]);
834: }
835: VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f);
836: }
838: /* Update the quadratic model */
839: PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints);
840: getquadpounders(mfqP);
841: VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin);
842: PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione));
843: /* G = G*(delta/deltaold) + Gdel */
844: ratio = mfqP->delta / deltaold;
845: iblas = blasm * blasn;
846: PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione));
847: PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione));
848: /* H = H*(delta/deltaold)^2 + Hdel */
849: iblas = blasm * blasn * blasn;
850: ratio *= ratio;
851: PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione));
852: PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione));
854: /* Get residuals */
855: pounders_update_res(tao);
857: /* Export solution and gradient residual to TAO */
858: VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution);
859: VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES);
860: VecAssemblyBegin(tao->gradient);
861: VecAssemblyEnd(tao->gradient);
862: VecNorm(tao->gradient, NORM_2, &gnorm);
863: gnorm *= mfqP->delta;
864: /* final criticality test */
865: TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its);
866: TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step);
867: PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
868: /* test for repeated model */
869: if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) {
870: same = PETSC_TRUE;
871: } else {
872: same = PETSC_FALSE;
873: }
874: for (i = 0; i < mfqP->nmodelpoints; i++) {
875: if (same) {
876: if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) {
877: same = PETSC_TRUE;
878: } else {
879: same = PETSC_FALSE;
880: }
881: }
882: mfqP->last_model_indices[i] = mfqP->model_indices[i];
883: }
884: mfqP->last_nmodelpoints = mfqP->nmodelpoints;
885: if (same && mfqP->delta == deltaold) {
886: PetscInfo(tao, "Identical model used in successive iterations\n");
887: tao->reason = TAO_CONVERGED_STEPTOL;
888: }
889: }
890: return 0;
891: }
893: static PetscErrorCode TaoSetUp_POUNDERS(Tao tao)
894: {
895: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
896: PetscInt i, j;
897: IS isfloc, isfglob, isxloc, isxglob;
899: if (!tao->gradient) VecDuplicate(tao->solution, &tao->gradient);
900: if (!tao->stepdirection) VecDuplicate(tao->solution, &tao->stepdirection);
901: VecGetSize(tao->solution, &mfqP->n);
902: VecGetSize(tao->ls_res, &mfqP->m);
903: mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
904: if (mfqP->npmax == PETSC_DEFAULT) mfqP->npmax = 2 * mfqP->n + 1;
905: mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax);
906: mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2);
908: PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist);
909: PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist);
910: for (i = 0; i < mfqP->n + 1; i++) {
911: VecDuplicate(tao->solution, &mfqP->Xhist[i]);
912: VecDuplicate(tao->ls_res, &mfqP->Fhist[i]);
913: }
914: VecDuplicate(tao->solution, &mfqP->workxvec);
915: VecDuplicate(tao->ls_res, &mfqP->workfvec);
916: mfqP->nHist = 0;
918: PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres);
919: PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES);
920: PetscMalloc1(mfqP->n, &mfqP->work);
921: PetscMalloc1(mfqP->n, &mfqP->work2);
922: PetscMalloc1(mfqP->n, &mfqP->work3);
923: PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork);
924: PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega);
925: PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta);
926: PetscMalloc1(mfqP->n + 1, &mfqP->alpha);
928: PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H);
929: PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q);
930: PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp);
931: PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L);
932: PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp);
933: PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save);
934: PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N);
935: PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M);
936: PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z);
937: PetscMalloc1(mfqP->npmax, &mfqP->tau);
938: PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp);
939: mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2);
940: PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork);
941: PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork);
942: PetscMalloc1(mfqP->n, &mfqP->xmin);
943: PetscMalloc1(mfqP->m, &mfqP->C);
944: PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff);
945: PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp);
946: PetscMalloc1(mfqP->n, &mfqP->Gres);
947: PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres);
948: PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints);
949: PetscMalloc1(mfqP->npmax, &mfqP->model_indices);
950: PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices);
951: PetscMalloc1(mfqP->n, &mfqP->Xsubproblem);
952: PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel);
953: PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel);
954: PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices);
955: PetscMalloc1(mfqP->n, &mfqP->iwork);
956: PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w);
957: for (i = 0; i < mfqP->m; i++) {
958: for (j = 0; j < mfqP->m; j++) {
959: if (i == j) {
960: mfqP->w[i + mfqP->m * j] = 1.0;
961: } else {
962: mfqP->w[i + mfqP->m * j] = 0.0;
963: }
964: }
965: }
966: for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i;
967: MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size);
968: if (mfqP->size > 1) {
969: VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx);
970: VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin);
971: VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf);
972: VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin);
973: ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc);
974: ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob);
975: ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc);
976: ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob);
978: VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx);
979: VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf);
981: ISDestroy(&isxloc);
982: ISDestroy(&isxglob);
983: ISDestroy(&isfloc);
984: ISDestroy(&isfglob);
985: }
987: if (!mfqP->usegqt) {
988: KSP ksp;
989: PC pc;
990: VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx);
991: VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl);
992: VecDuplicate(mfqP->subxl, &mfqP->subb);
993: VecDuplicate(mfqP->subxl, &mfqP->subxu);
994: VecDuplicate(mfqP->subxl, &mfqP->subpdel);
995: VecDuplicate(mfqP->subxl, &mfqP->subndel);
996: TaoCreate(PETSC_COMM_SELF, &mfqP->subtao);
997: PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1);
998: TaoSetType(mfqP->subtao, TAOBNTR);
999: TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_");
1000: TaoSetSolution(mfqP->subtao, mfqP->subx);
1001: TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP);
1002: TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits);
1003: TaoSetFromOptions(mfqP->subtao);
1004: TaoGetKSP(mfqP->subtao, &ksp);
1005: if (ksp) {
1006: KSPGetPC(ksp, &pc);
1007: PCSetType(pc, PCNONE);
1008: }
1009: TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu);
1010: MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH);
1011: TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP);
1012: }
1013: return 0;
1014: }
1016: static PetscErrorCode TaoDestroy_POUNDERS(Tao tao)
1017: {
1018: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1019: PetscInt i;
1021: if (!mfqP->usegqt) {
1022: TaoDestroy(&mfqP->subtao);
1023: VecDestroy(&mfqP->subx);
1024: VecDestroy(&mfqP->subxl);
1025: VecDestroy(&mfqP->subxu);
1026: VecDestroy(&mfqP->subb);
1027: VecDestroy(&mfqP->subpdel);
1028: VecDestroy(&mfqP->subndel);
1029: MatDestroy(&mfqP->subH);
1030: }
1031: PetscFree(mfqP->Fres);
1032: PetscFree(mfqP->RES);
1033: PetscFree(mfqP->work);
1034: PetscFree(mfqP->work2);
1035: PetscFree(mfqP->work3);
1036: PetscFree(mfqP->mwork);
1037: PetscFree(mfqP->omega);
1038: PetscFree(mfqP->beta);
1039: PetscFree(mfqP->alpha);
1040: PetscFree(mfqP->H);
1041: PetscFree(mfqP->Q);
1042: PetscFree(mfqP->Q_tmp);
1043: PetscFree(mfqP->L);
1044: PetscFree(mfqP->L_tmp);
1045: PetscFree(mfqP->L_save);
1046: PetscFree(mfqP->N);
1047: PetscFree(mfqP->M);
1048: PetscFree(mfqP->Z);
1049: PetscFree(mfqP->tau);
1050: PetscFree(mfqP->tau_tmp);
1051: PetscFree(mfqP->npmaxwork);
1052: PetscFree(mfqP->npmaxiwork);
1053: PetscFree(mfqP->xmin);
1054: PetscFree(mfqP->C);
1055: PetscFree(mfqP->Fdiff);
1056: PetscFree(mfqP->Disp);
1057: PetscFree(mfqP->Gres);
1058: PetscFree(mfqP->Hres);
1059: PetscFree(mfqP->Gpoints);
1060: PetscFree(mfqP->model_indices);
1061: PetscFree(mfqP->last_model_indices);
1062: PetscFree(mfqP->Xsubproblem);
1063: PetscFree(mfqP->Gdel);
1064: PetscFree(mfqP->Hdel);
1065: PetscFree(mfqP->indices);
1066: PetscFree(mfqP->iwork);
1067: PetscFree(mfqP->w);
1068: for (i = 0; i < mfqP->nHist; i++) {
1069: VecDestroy(&mfqP->Xhist[i]);
1070: VecDestroy(&mfqP->Fhist[i]);
1071: }
1072: VecDestroy(&mfqP->workxvec);
1073: VecDestroy(&mfqP->workfvec);
1074: PetscFree(mfqP->Xhist);
1075: PetscFree(mfqP->Fhist);
1077: if (mfqP->size > 1) {
1078: VecDestroy(&mfqP->localx);
1079: VecDestroy(&mfqP->localxmin);
1080: VecDestroy(&mfqP->localf);
1081: VecDestroy(&mfqP->localfmin);
1082: }
1083: PetscFree(tao->data);
1084: return 0;
1085: }
1087: static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject)
1088: {
1089: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1091: PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization");
1092: PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL);
1093: mfqP->delta = mfqP->delta0;
1094: PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL);
1095: PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL);
1096: PetscOptionsHeadEnd();
1097: return 0;
1098: }
1100: static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer)
1101: {
1102: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1103: PetscBool isascii;
1105: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
1106: if (isascii) {
1107: PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0);
1108: PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta);
1109: PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints);
1110: if (mfqP->usegqt) {
1111: PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n");
1112: } else {
1113: TaoView(mfqP->subtao, viewer);
1114: }
1115: }
1116: return 0;
1117: }
1118: /*MC
1119: TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares
1121: Options Database Keys:
1122: + -tao_pounders_delta - initial step length
1123: . -tao_pounders_npmax - maximum number of points in model
1124: - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON
1126: Level: beginner
1128: M*/
1130: PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao)
1131: {
1132: TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1134: tao->ops->setup = TaoSetUp_POUNDERS;
1135: tao->ops->solve = TaoSolve_POUNDERS;
1136: tao->ops->view = TaoView_POUNDERS;
1137: tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1138: tao->ops->destroy = TaoDestroy_POUNDERS;
1140: PetscNew(&mfqP);
1141: tao->data = (void *)mfqP;
1142: /* Override default settings (unless already changed) */
1143: if (!tao->max_it_changed) tao->max_it = 2000;
1144: if (!tao->max_funcs_changed) tao->max_funcs = 4000;
1145: mfqP->npmax = PETSC_DEFAULT;
1146: mfqP->delta0 = 0.1;
1147: mfqP->delta = 0.1;
1148: mfqP->deltamax = 1e3;
1149: mfqP->deltamin = 1e-6;
1150: mfqP->c2 = 10.0;
1151: mfqP->theta1 = 1e-5;
1152: mfqP->theta2 = 1e-4;
1153: mfqP->gamma0 = 0.5;
1154: mfqP->gamma1 = 2.0;
1155: mfqP->eta0 = 0.0;
1156: mfqP->eta1 = 0.1;
1157: mfqP->usegqt = PETSC_FALSE;
1158: mfqP->gqt_rtol = 0.001;
1159: mfqP->gqt_maxits = 50;
1160: mfqP->workxvec = NULL;
1161: return 0;
1162: }