Actual source code: dt.c

  1: /* Discretization tools */

  3: #include <petscdt.h>
  4: #include <petscblaslapack.h>
  5: #include <petsc/private/petscimpl.h>
  6: #include <petsc/private/dtimpl.h>
  7: #include <petscviewer.h>
  8: #include <petscdmplex.h>
  9: #include <petscdmshell.h>

 11: #if defined(PETSC_HAVE_MPFR)
 12:   #include <mpfr.h>
 13: #endif

 15: const char *const        PetscDTNodeTypes_shifted[] = {"default", "gaussjacobi", "equispaced", "tanhsinh", "PETSCDTNODES_", NULL};
 16: const char *const *const PetscDTNodeTypes           = PetscDTNodeTypes_shifted + 1;

 18: const char *const        PetscDTSimplexQuadratureTypes_shifted[] = {"default", "conic", "minsym", "PETSCDTSIMPLEXQUAD_", NULL};
 19: const char *const *const PetscDTSimplexQuadratureTypes           = PetscDTSimplexQuadratureTypes_shifted + 1;

 21: static PetscBool GolubWelschCite       = PETSC_FALSE;
 22: const char       GolubWelschCitation[] = "@article{GolubWelsch1969,\n"
 23:                                          "  author  = {Golub and Welsch},\n"
 24:                                          "  title   = {Calculation of Quadrature Rules},\n"
 25:                                          "  journal = {Math. Comp.},\n"
 26:                                          "  volume  = {23},\n"
 27:                                          "  number  = {106},\n"
 28:                                          "  pages   = {221--230},\n"
 29:                                          "  year    = {1969}\n}\n";

 31: /* Numerical tests in src/dm/dt/tests/ex1.c show that when computing the nodes and weights of Gauss-Jacobi
 32:    quadrature rules:

 34:    - in double precision, Newton's method and Golub & Welsch both work for moderate degrees (< 100),
 35:    - in single precision, Newton's method starts producing incorrect roots around n = 15, but
 36:      the weights from Golub & Welsch become a problem before then: they produces errors
 37:      in computing the Jacobi-polynomial Gram matrix around n = 6.

 39:    So we default to Newton's method (required fewer dependencies) */
 40: PetscBool PetscDTGaussQuadratureNewton_Internal = PETSC_TRUE;

 42: PetscClassId PETSCQUADRATURE_CLASSID = 0;

 44: /*@
 45:   PetscQuadratureCreate - Create a `PetscQuadrature` object

 47:   Collective

 49:   Input Parameter:
 50: . comm - The communicator for the `PetscQuadrature` object

 52:   Output Parameter:
 53: . q  - The PetscQuadrature object

 55:   Level: beginner

 57: .seealso: `PetscQuadrature`, `Petscquadraturedestroy()`, `PetscQuadratureGetData()`
 58: @*/
 59: PetscErrorCode PetscQuadratureCreate(MPI_Comm comm, PetscQuadrature *q)
 60: {
 62:   DMInitializePackage();
 63:   PetscHeaderCreate(*q, PETSCQUADRATURE_CLASSID, "PetscQuadrature", "Quadrature", "DT", comm, PetscQuadratureDestroy, PetscQuadratureView);
 64:   (*q)->dim       = -1;
 65:   (*q)->Nc        = 1;
 66:   (*q)->order     = -1;
 67:   (*q)->numPoints = 0;
 68:   (*q)->points    = NULL;
 69:   (*q)->weights   = NULL;
 70:   return 0;
 71: }

 73: /*@
 74:   PetscQuadratureDuplicate - Create a deep copy of the `PetscQuadrature` object

 76:   Collective on q

 78:   Input Parameter:
 79: . q  - The `PetscQuadrature` object

 81:   Output Parameter:
 82: . r  - The new `PetscQuadrature` object

 84:   Level: beginner

 86: .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureDestroy()`, `PetscQuadratureGetData()`
 87: @*/
 88: PetscErrorCode PetscQuadratureDuplicate(PetscQuadrature q, PetscQuadrature *r)
 89: {
 90:   PetscInt         order, dim, Nc, Nq;
 91:   const PetscReal *points, *weights;
 92:   PetscReal       *p, *w;

 95:   PetscQuadratureCreate(PetscObjectComm((PetscObject)q), r);
 96:   PetscQuadratureGetOrder(q, &order);
 97:   PetscQuadratureSetOrder(*r, order);
 98:   PetscQuadratureGetData(q, &dim, &Nc, &Nq, &points, &weights);
 99:   PetscMalloc1(Nq * dim, &p);
100:   PetscMalloc1(Nq * Nc, &w);
101:   PetscArraycpy(p, points, Nq * dim);
102:   PetscArraycpy(w, weights, Nc * Nq);
103:   PetscQuadratureSetData(*r, dim, Nc, Nq, p, w);
104:   return 0;
105: }

107: /*@
108:   PetscQuadratureDestroy - Destroys a `PetscQuadrature` object

110:   Collective on q

112:   Input Parameter:
113: . q  - The `PetscQuadrature` object

115:   Level: beginner

117: .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
118: @*/
119: PetscErrorCode PetscQuadratureDestroy(PetscQuadrature *q)
120: {
121:   if (!*q) return 0;
123:   if (--((PetscObject)(*q))->refct > 0) {
124:     *q = NULL;
125:     return 0;
126:   }
127:   PetscFree((*q)->points);
128:   PetscFree((*q)->weights);
129:   PetscHeaderDestroy(q);
130:   return 0;
131: }

133: /*@
134:   PetscQuadratureGetOrder - Return the order of the method in the `PetscQuadrature`

136:   Not collective

138:   Input Parameter:
139: . q - The `PetscQuadrature` object

141:   Output Parameter:
142: . order - The order of the quadrature, i.e. the highest degree polynomial that is exactly integrated

144:   Level: intermediate

146: .seealso: `PetscQuadrature`, `PetscQuadratureSetOrder()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
147: @*/
148: PetscErrorCode PetscQuadratureGetOrder(PetscQuadrature q, PetscInt *order)
149: {
152:   *order = q->order;
153:   return 0;
154: }

156: /*@
157:   PetscQuadratureSetOrder - Set the order of the method in the `PetscQuadrature`

159:   Not collective

161:   Input Parameters:
162: + q - The `PetscQuadrature` object
163: - order - The order of the quadrature, i.e. the highest degree polynomial that is exactly integrated

165:   Level: intermediate

167: .seealso: `PetscQuadrature`, `PetscQuadratureGetOrder()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
168: @*/
169: PetscErrorCode PetscQuadratureSetOrder(PetscQuadrature q, PetscInt order)
170: {
172:   q->order = order;
173:   return 0;
174: }

176: /*@
177:   PetscQuadratureGetNumComponents - Return the number of components for functions to be integrated

179:   Not collective

181:   Input Parameter:
182: . q - The `PetscQuadrature` object

184:   Output Parameter:
185: . Nc - The number of components

187:   Note:
188:   We are performing an integral int f(x) . w(x) dx, where both f and w (the weight) have Nc components.

190:   Level: intermediate

192: .seealso: `PetscQuadrature`, `PetscQuadratureSetNumComponents()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
193: @*/
194: PetscErrorCode PetscQuadratureGetNumComponents(PetscQuadrature q, PetscInt *Nc)
195: {
198:   *Nc = q->Nc;
199:   return 0;
200: }

202: /*@
203:   PetscQuadratureSetNumComponents - Return the number of components for functions to be integrated

205:   Not collective

207:   Input Parameters:
208: + q  - The PetscQuadrature object
209: - Nc - The number of components

211:   Note:
212:   We are performing an integral int f(x) . w(x) dx, where both f and w (the weight) have Nc components.

214:   Level: intermediate

216: .seealso: `PetscQuadrature`, `PetscQuadratureGetNumComponents()`, `PetscQuadratureGetData()`, `PetscQuadratureSetData()`
217: @*/
218: PetscErrorCode PetscQuadratureSetNumComponents(PetscQuadrature q, PetscInt Nc)
219: {
221:   q->Nc = Nc;
222:   return 0;
223: }

225: /*@C
226:   PetscQuadratureGetData - Returns the data defining the `PetscQuadrature`

228:   Not collective

230:   Input Parameter:
231: . q  - The `PetscQuadrature` object

233:   Output Parameters:
234: + dim - The spatial dimension
235: . Nc - The number of components
236: . npoints - The number of quadrature points
237: . points - The coordinates of each quadrature point
238: - weights - The weight of each quadrature point

240:   Level: intermediate

242:   Fortran Note:
243:   From Fortran you must call `PetscQuadratureRestoreData()` when you are done with the data

245: .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureSetData()`
246: @*/
247: PetscErrorCode PetscQuadratureGetData(PetscQuadrature q, PetscInt *dim, PetscInt *Nc, PetscInt *npoints, const PetscReal *points[], const PetscReal *weights[])
248: {
250:   if (dim) {
252:     *dim = q->dim;
253:   }
254:   if (Nc) {
256:     *Nc = q->Nc;
257:   }
258:   if (npoints) {
260:     *npoints = q->numPoints;
261:   }
262:   if (points) {
264:     *points = q->points;
265:   }
266:   if (weights) {
268:     *weights = q->weights;
269:   }
270:   return 0;
271: }

273: /*@
274:   PetscQuadratureEqual - determine whether two quadratures are equivalent

276:   Input Parameters:
277: + A - A `PetscQuadrature` object
278: - B - Another `PetscQuadrature` object

280:   Output Parameters:
281: . equal - `PETSC_TRUE` if the quadratures are the same

283:   Level: intermediate

285: .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`
286: @*/
287: PetscErrorCode PetscQuadratureEqual(PetscQuadrature A, PetscQuadrature B, PetscBool *equal)
288: {
292:   *equal = PETSC_FALSE;
293:   if (A->dim != B->dim || A->Nc != B->Nc || A->order != B->order || A->numPoints != B->numPoints) return 0;
294:   for (PetscInt i = 0; i < A->numPoints * A->dim; i++) {
295:     if (PetscAbsReal(A->points[i] - B->points[i]) > PETSC_SMALL) return 0;
296:   }
297:   if (!A->weights && !B->weights) {
298:     *equal = PETSC_TRUE;
299:     return 0;
300:   }
301:   if (A->weights && B->weights) {
302:     for (PetscInt i = 0; i < A->numPoints; i++) {
303:       if (PetscAbsReal(A->weights[i] - B->weights[i]) > PETSC_SMALL) return 0;
304:     }
305:     *equal = PETSC_TRUE;
306:   }
307:   return 0;
308: }

310: static PetscErrorCode PetscDTJacobianInverse_Internal(PetscInt m, PetscInt n, const PetscReal J[], PetscReal Jinv[])
311: {
312:   PetscScalar *Js, *Jinvs;
313:   PetscInt     i, j, k;
314:   PetscBLASInt bm, bn, info;

316:   if (!m || !n) return 0;
317:   PetscBLASIntCast(m, &bm);
318:   PetscBLASIntCast(n, &bn);
319: #if defined(PETSC_USE_COMPLEX)
320:   PetscMalloc2(m * n, &Js, m * n, &Jinvs);
321:   for (i = 0; i < m * n; i++) Js[i] = J[i];
322: #else
323:   Js    = (PetscReal *)J;
324:   Jinvs = Jinv;
325: #endif
326:   if (m == n) {
327:     PetscBLASInt *pivots;
328:     PetscScalar  *W;

330:     PetscMalloc2(m, &pivots, m, &W);

332:     PetscArraycpy(Jinvs, Js, m * m);
333:     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bm, &bm, Jinvs, &bm, pivots, &info));
335:     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bm, Jinvs, &bm, pivots, W, &bm, &info));
337:     PetscFree2(pivots, W);
338:   } else if (m < n) {
339:     PetscScalar  *JJT;
340:     PetscBLASInt *pivots;
341:     PetscScalar  *W;

343:     PetscMalloc1(m * m, &JJT);
344:     PetscMalloc2(m, &pivots, m, &W);
345:     for (i = 0; i < m; i++) {
346:       for (j = 0; j < m; j++) {
347:         PetscScalar val = 0.;

349:         for (k = 0; k < n; k++) val += Js[i * n + k] * Js[j * n + k];
350:         JJT[i * m + j] = val;
351:       }
352:     }

354:     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bm, &bm, JJT, &bm, pivots, &info));
356:     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bm, JJT, &bm, pivots, W, &bm, &info));
358:     for (i = 0; i < n; i++) {
359:       for (j = 0; j < m; j++) {
360:         PetscScalar val = 0.;

362:         for (k = 0; k < m; k++) val += Js[k * n + i] * JJT[k * m + j];
363:         Jinvs[i * m + j] = val;
364:       }
365:     }
366:     PetscFree2(pivots, W);
367:     PetscFree(JJT);
368:   } else {
369:     PetscScalar  *JTJ;
370:     PetscBLASInt *pivots;
371:     PetscScalar  *W;

373:     PetscMalloc1(n * n, &JTJ);
374:     PetscMalloc2(n, &pivots, n, &W);
375:     for (i = 0; i < n; i++) {
376:       for (j = 0; j < n; j++) {
377:         PetscScalar val = 0.;

379:         for (k = 0; k < m; k++) val += Js[k * n + i] * Js[k * n + j];
380:         JTJ[i * n + j] = val;
381:       }
382:     }

384:     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, JTJ, &bn, pivots, &info));
386:     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, JTJ, &bn, pivots, W, &bn, &info));
388:     for (i = 0; i < n; i++) {
389:       for (j = 0; j < m; j++) {
390:         PetscScalar val = 0.;

392:         for (k = 0; k < n; k++) val += JTJ[i * n + k] * Js[j * n + k];
393:         Jinvs[i * m + j] = val;
394:       }
395:     }
396:     PetscFree2(pivots, W);
397:     PetscFree(JTJ);
398:   }
399: #if defined(PETSC_USE_COMPLEX)
400:   for (i = 0; i < m * n; i++) Jinv[i] = PetscRealPart(Jinvs[i]);
401:   PetscFree2(Js, Jinvs);
402: #endif
403:   return 0;
404: }

406: /*@
407:    PetscQuadraturePushForward - Push forward a quadrature functional under an affine transformation.

409:    Collecive on `PetscQuadrature`

411:    Input Parameters:
412: +  q - the quadrature functional
413: .  imageDim - the dimension of the image of the transformation
414: .  origin - a point in the original space
415: .  originImage - the image of the origin under the transformation
416: .  J - the Jacobian of the image: an [imageDim x dim] matrix in row major order
417: -  formDegree - transform the quadrature weights as k-forms of this form degree (if the number of components is a multiple of (dim choose formDegree), it is assumed that they represent multiple k-forms) [see `PetscDTAltVPullback()` for interpretation of formDegree]

419:    Output Parameters:
420: .  Jinvstarq - a quadrature rule where each point is the image of a point in the original quadrature rule, and where the k-form weights have been pulled-back by the pseudoinverse of J to the k-form weights in the image space.

422:    Level: intermediate

424:    Note:
425:    The new quadrature rule will have a different number of components if spaces have different dimensions.  For example, pushing a 2-form forward from a two dimensional space to a three dimensional space changes the number of components from 1 to 3.

427: .seealso: `PetscQuadrature`, `PetscDTAltVPullback()`, `PetscDTAltVPullbackMatrix()`
428: @*/
429: PetscErrorCode PetscQuadraturePushForward(PetscQuadrature q, PetscInt imageDim, const PetscReal origin[], const PetscReal originImage[], const PetscReal J[], PetscInt formDegree, PetscQuadrature *Jinvstarq)
430: {
431:   PetscInt         dim, Nc, imageNc, formSize, Ncopies, imageFormSize, Npoints, pt, i, j, c;
432:   const PetscReal *points;
433:   const PetscReal *weights;
434:   PetscReal       *imagePoints, *imageWeights;
435:   PetscReal       *Jinv;
436:   PetscReal       *Jinvstar;

440:   PetscQuadratureGetData(q, &dim, &Nc, &Npoints, &points, &weights);
441:   PetscDTBinomialInt(dim, PetscAbsInt(formDegree), &formSize);
443:   Ncopies = Nc / formSize;
444:   PetscDTBinomialInt(imageDim, PetscAbsInt(formDegree), &imageFormSize);
445:   imageNc = Ncopies * imageFormSize;
446:   PetscMalloc1(Npoints * imageDim, &imagePoints);
447:   PetscMalloc1(Npoints * imageNc, &imageWeights);
448:   PetscMalloc2(imageDim * dim, &Jinv, formSize * imageFormSize, &Jinvstar);
449:   PetscDTJacobianInverse_Internal(imageDim, dim, J, Jinv);
450:   PetscDTAltVPullbackMatrix(imageDim, dim, Jinv, formDegree, Jinvstar);
451:   for (pt = 0; pt < Npoints; pt++) {
452:     const PetscReal *point      = &points[pt * dim];
453:     PetscReal       *imagePoint = &imagePoints[pt * imageDim];

455:     for (i = 0; i < imageDim; i++) {
456:       PetscReal val = originImage[i];

458:       for (j = 0; j < dim; j++) val += J[i * dim + j] * (point[j] - origin[j]);
459:       imagePoint[i] = val;
460:     }
461:     for (c = 0; c < Ncopies; c++) {
462:       const PetscReal *form      = &weights[pt * Nc + c * formSize];
463:       PetscReal       *imageForm = &imageWeights[pt * imageNc + c * imageFormSize];

465:       for (i = 0; i < imageFormSize; i++) {
466:         PetscReal val = 0.;

468:         for (j = 0; j < formSize; j++) val += Jinvstar[i * formSize + j] * form[j];
469:         imageForm[i] = val;
470:       }
471:     }
472:   }
473:   PetscQuadratureCreate(PetscObjectComm((PetscObject)q), Jinvstarq);
474:   PetscQuadratureSetData(*Jinvstarq, imageDim, imageNc, Npoints, imagePoints, imageWeights);
475:   PetscFree2(Jinv, Jinvstar);
476:   return 0;
477: }

479: /*@C
480:   PetscQuadratureSetData - Sets the data defining the quadrature

482:   Not collective

484:   Input Parameters:
485: + q  - The `PetscQuadrature` object
486: . dim - The spatial dimension
487: . Nc - The number of components
488: . npoints - The number of quadrature points
489: . points - The coordinates of each quadrature point
490: - weights - The weight of each quadrature point

492:   Level: intermediate

494:   Note:
495:   This routine owns the references to points and weights, so they must be allocated using `PetscMalloc()` and the user should not free them.

497: .seealso: `PetscQuadrature`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
498: @*/
499: PetscErrorCode PetscQuadratureSetData(PetscQuadrature q, PetscInt dim, PetscInt Nc, PetscInt npoints, const PetscReal points[], const PetscReal weights[])
500: {
502:   if (dim >= 0) q->dim = dim;
503:   if (Nc >= 0) q->Nc = Nc;
504:   if (npoints >= 0) q->numPoints = npoints;
505:   if (points) {
507:     q->points = points;
508:   }
509:   if (weights) {
511:     q->weights = weights;
512:   }
513:   return 0;
514: }

516: static PetscErrorCode PetscQuadratureView_Ascii(PetscQuadrature quad, PetscViewer v)
517: {
518:   PetscInt          q, d, c;
519:   PetscViewerFormat format;

521:   if (quad->Nc > 1) PetscViewerASCIIPrintf(v, "Quadrature of order %" PetscInt_FMT " on %" PetscInt_FMT " points (dim %" PetscInt_FMT ") with %" PetscInt_FMT " components\n", quad->order, quad->numPoints, quad->dim, quad->Nc);
522:   else PetscViewerASCIIPrintf(v, "Quadrature of order %" PetscInt_FMT " on %" PetscInt_FMT " points (dim %" PetscInt_FMT ")\n", quad->order, quad->numPoints, quad->dim);
523:   PetscViewerGetFormat(v, &format);
524:   if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) return 0;
525:   for (q = 0; q < quad->numPoints; ++q) {
526:     PetscViewerASCIIPrintf(v, "p%" PetscInt_FMT " (", q);
527:     PetscViewerASCIIUseTabs(v, PETSC_FALSE);
528:     for (d = 0; d < quad->dim; ++d) {
529:       if (d) PetscViewerASCIIPrintf(v, ", ");
530:       PetscViewerASCIIPrintf(v, "%+g", (double)quad->points[q * quad->dim + d]);
531:     }
532:     PetscViewerASCIIPrintf(v, ") ");
533:     if (quad->Nc > 1) PetscViewerASCIIPrintf(v, "w%" PetscInt_FMT " (", q);
534:     for (c = 0; c < quad->Nc; ++c) {
535:       if (c) PetscViewerASCIIPrintf(v, ", ");
536:       PetscViewerASCIIPrintf(v, "%+g", (double)quad->weights[q * quad->Nc + c]);
537:     }
538:     if (quad->Nc > 1) PetscViewerASCIIPrintf(v, ")");
539:     PetscViewerASCIIPrintf(v, "\n");
540:     PetscViewerASCIIUseTabs(v, PETSC_TRUE);
541:   }
542:   return 0;
543: }

545: /*@C
546:   PetscQuadratureView - View a `PetscQuadrature` object

548:   Collective on quad

550:   Input Parameters:
551: + quad  - The `PetscQuadrature` object
552: - viewer - The `PetscViewer` object

554:   Level: beginner

556: .seealso: `PetscQuadrature`, `PetscViewer`, `PetscQuadratureCreate()`, `PetscQuadratureGetData()`
557: @*/
558: PetscErrorCode PetscQuadratureView(PetscQuadrature quad, PetscViewer viewer)
559: {
560:   PetscBool iascii;

564:   if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)quad), &viewer);
565:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
566:   PetscViewerASCIIPushTab(viewer);
567:   if (iascii) PetscQuadratureView_Ascii(quad, viewer);
568:   PetscViewerASCIIPopTab(viewer);
569:   return 0;
570: }

572: /*@C
573:   PetscQuadratureExpandComposite - Return a quadrature over the composite element, which has the original quadrature in each subelement

575:   Not collective

577:   Input Parameters:
578: + q - The original `PetscQuadrature`
579: . numSubelements - The number of subelements the original element is divided into
580: . v0 - An array of the initial points for each subelement
581: - jac - An array of the Jacobian mappings from the reference to each subelement

583:   Output Parameters:
584: . dim - The dimension

586:   Note:
587:   Together v0 and jac define an affine mapping from the original reference element to each subelement

589:   Fortran Note:
590:   Not available from Fortran

592:   Level: intermediate

594: .seealso: `PetscQuadrature`, `PetscFECreate()`, `PetscSpaceGetDimension()`, `PetscDualSpaceGetDimension()`
595: @*/
596: PetscErrorCode PetscQuadratureExpandComposite(PetscQuadrature q, PetscInt numSubelements, const PetscReal v0[], const PetscReal jac[], PetscQuadrature *qref)
597: {
598:   const PetscReal *points, *weights;
599:   PetscReal       *pointsRef, *weightsRef;
600:   PetscInt         dim, Nc, order, npoints, npointsRef, c, p, cp, d, e;

606:   PetscQuadratureCreate(PETSC_COMM_SELF, qref);
607:   PetscQuadratureGetOrder(q, &order);
608:   PetscQuadratureGetData(q, &dim, &Nc, &npoints, &points, &weights);
609:   npointsRef = npoints * numSubelements;
610:   PetscMalloc1(npointsRef * dim, &pointsRef);
611:   PetscMalloc1(npointsRef * Nc, &weightsRef);
612:   for (c = 0; c < numSubelements; ++c) {
613:     for (p = 0; p < npoints; ++p) {
614:       for (d = 0; d < dim; ++d) {
615:         pointsRef[(c * npoints + p) * dim + d] = v0[c * dim + d];
616:         for (e = 0; e < dim; ++e) pointsRef[(c * npoints + p) * dim + d] += jac[(c * dim + d) * dim + e] * (points[p * dim + e] + 1.0);
617:       }
618:       /* Could also use detJ here */
619:       for (cp = 0; cp < Nc; ++cp) weightsRef[(c * npoints + p) * Nc + cp] = weights[p * Nc + cp] / numSubelements;
620:     }
621:   }
622:   PetscQuadratureSetOrder(*qref, order);
623:   PetscQuadratureSetData(*qref, dim, Nc, npointsRef, pointsRef, weightsRef);
624:   return 0;
625: }

627: /* Compute the coefficients for the Jacobi polynomial recurrence,
628:  *
629:  * J^{a,b}_n(x) = (cnm1 + cnm1x * x) * J^{a,b}_{n-1}(x) - cnm2 * J^{a,b}_{n-2}(x).
630:  */
631: #define PetscDTJacobiRecurrence_Internal(n, a, b, cnm1, cnm1x, cnm2) \
632:   do { \
633:     PetscReal _a = (a); \
634:     PetscReal _b = (b); \
635:     PetscReal _n = (n); \
636:     if (n == 1) { \
637:       (cnm1)  = (_a - _b) * 0.5; \
638:       (cnm1x) = (_a + _b + 2.) * 0.5; \
639:       (cnm2)  = 0.; \
640:     } else { \
641:       PetscReal _2n  = _n + _n; \
642:       PetscReal _d   = (_2n * (_n + _a + _b) * (_2n + _a + _b - 2)); \
643:       PetscReal _n1  = (_2n + _a + _b - 1.) * (_a * _a - _b * _b); \
644:       PetscReal _n1x = (_2n + _a + _b - 1.) * (_2n + _a + _b) * (_2n + _a + _b - 2); \
645:       PetscReal _n2  = 2. * ((_n + _a - 1.) * (_n + _b - 1.) * (_2n + _a + _b)); \
646:       (cnm1)         = _n1 / _d; \
647:       (cnm1x)        = _n1x / _d; \
648:       (cnm2)         = _n2 / _d; \
649:     } \
650:   } while (0)

652: /*@
653:   PetscDTJacobiNorm - Compute the weighted L2 norm of a Jacobi polynomial.

655:   $\| P^{\alpha,\beta}_n \|_{\alpha,\beta}^2 = \int_{-1}^1 (1 + x)^{\alpha} (1 - x)^{\beta} P^{\alpha,\beta}_n (x)^2 dx.$

657:   Input Parameters:
658: - alpha - the left exponent > -1
659: . beta - the right exponent > -1
660: + n - the polynomial degree

662:   Output Parameter:
663: . norm - the weighted L2 norm

665:   Level: beginner

667: .seealso: `PetscQuadrature`, `PetscDTJacobiEval()`
668: @*/
669: PetscErrorCode PetscDTJacobiNorm(PetscReal alpha, PetscReal beta, PetscInt n, PetscReal *norm)
670: {
671:   PetscReal twoab1;
672:   PetscReal gr;

677:   twoab1 = PetscPowReal(2., alpha + beta + 1.);
678: #if defined(PETSC_HAVE_LGAMMA)
679:   if (!n) {
680:     gr = PetscExpReal(PetscLGamma(alpha + 1.) + PetscLGamma(beta + 1.) - PetscLGamma(alpha + beta + 2.));
681:   } else {
682:     gr = PetscExpReal(PetscLGamma(n + alpha + 1.) + PetscLGamma(n + beta + 1.) - (PetscLGamma(n + 1.) + PetscLGamma(n + alpha + beta + 1.))) / (n + n + alpha + beta + 1.);
683:   }
684: #else
685:   {
686:     PetscInt alphai = (PetscInt)alpha;
687:     PetscInt betai  = (PetscInt)beta;
688:     PetscInt i;

690:     gr = n ? (1. / (n + n + alpha + beta + 1.)) : 1.;
691:     if ((PetscReal)alphai == alpha) {
692:       if (!n) {
693:         for (i = 0; i < alphai; i++) gr *= (i + 1.) / (beta + i + 1.);
694:         gr /= (alpha + beta + 1.);
695:       } else {
696:         for (i = 0; i < alphai; i++) gr *= (n + i + 1.) / (n + beta + i + 1.);
697:       }
698:     } else if ((PetscReal)betai == beta) {
699:       if (!n) {
700:         for (i = 0; i < betai; i++) gr *= (i + 1.) / (alpha + i + 2.);
701:         gr /= (alpha + beta + 1.);
702:       } else {
703:         for (i = 0; i < betai; i++) gr *= (n + i + 1.) / (n + alpha + i + 1.);
704:       }
705:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
706:   }
707: #endif
708:   *norm = PetscSqrtReal(twoab1 * gr);
709:   return 0;
710: }

712: static PetscErrorCode PetscDTJacobiEval_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscInt k, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *p)
713: {
714:   PetscReal ak, bk;
715:   PetscReal abk1;
716:   PetscInt  i, l, maxdegree;

718:   maxdegree = degrees[ndegree - 1] - k;
719:   ak        = a + k;
720:   bk        = b + k;
721:   abk1      = a + b + k + 1.;
722:   if (maxdegree < 0) {
723:     for (i = 0; i < npoints; i++)
724:       for (l = 0; l < ndegree; l++) p[i * ndegree + l] = 0.;
725:     return 0;
726:   }
727:   for (i = 0; i < npoints; i++) {
728:     PetscReal pm1, pm2, x;
729:     PetscReal cnm1, cnm1x, cnm2;
730:     PetscInt  j, m;

732:     x   = points[i];
733:     pm2 = 1.;
734:     PetscDTJacobiRecurrence_Internal(1, ak, bk, cnm1, cnm1x, cnm2);
735:     pm1 = (cnm1 + cnm1x * x);
736:     l   = 0;
737:     while (l < ndegree && degrees[l] - k < 0) p[l++] = 0.;
738:     while (l < ndegree && degrees[l] - k == 0) {
739:       p[l] = pm2;
740:       for (m = 0; m < k; m++) p[l] *= (abk1 + m) * 0.5;
741:       l++;
742:     }
743:     while (l < ndegree && degrees[l] - k == 1) {
744:       p[l] = pm1;
745:       for (m = 0; m < k; m++) p[l] *= (abk1 + 1 + m) * 0.5;
746:       l++;
747:     }
748:     for (j = 2; j <= maxdegree; j++) {
749:       PetscReal pp;

751:       PetscDTJacobiRecurrence_Internal(j, ak, bk, cnm1, cnm1x, cnm2);
752:       pp  = (cnm1 + cnm1x * x) * pm1 - cnm2 * pm2;
753:       pm2 = pm1;
754:       pm1 = pp;
755:       while (l < ndegree && degrees[l] - k == j) {
756:         p[l] = pp;
757:         for (m = 0; m < k; m++) p[l] *= (abk1 + j + m) * 0.5;
758:         l++;
759:       }
760:     }
761:     p += ndegree;
762:   }
763:   return 0;
764: }

766: /*@
767:   PetscDTJacobiEvalJet - Evaluate the jet (function and derivatives) of the Jacobi polynomials basis up to a given degree.
768:   The Jacobi polynomials with indices $\alpha$ and $\beta$ are orthogonal with respect to the weighted inner product
769:   $\langle f, g \rangle = \int_{-1}^1 (1+x)^{\alpha} (1-x)^{\beta} f(x) g(x) dx$.

771:   Input Parameters:
772: + alpha - the left exponent of the weight
773: . beta - the right exponetn of the weight
774: . npoints - the number of points to evaluate the polynomials at
775: . points - [npoints] array of point coordinates
776: . degree - the maximm degree polynomial space to evaluate, (degree + 1) will be evaluated total.
777: - k - the maximum derivative to evaluate in the jet, (k + 1) will be evaluated total.

779:   Output Parameters:
780: - p - an array containing the evaluations of the Jacobi polynomials's jets on the points.  the size is (degree + 1) x
781:   (k + 1) x npoints, which also describes the order of the dimensions of this three-dimensional array: the first
782:   (slowest varying) dimension is polynomial degree; the second dimension is derivative order; the third (fastest
783:   varying) dimension is the index of the evaluation point.

785:   Level: advanced

787: .seealso: `PetscDTJacobiEval()`, `PetscDTPKDEvalJet()`
788: @*/
789: PetscErrorCode PetscDTJacobiEvalJet(PetscReal alpha, PetscReal beta, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt k, PetscReal p[])
790: {
791:   PetscInt   i, j, l;
792:   PetscInt  *degrees;
793:   PetscReal *psingle;

795:   if (degree == 0) {
796:     PetscInt zero = 0;

798:     for (i = 0; i <= k; i++) PetscDTJacobiEval_Internal(npoints, alpha, beta, i, points, 1, &zero, &p[i * npoints]);
799:     return 0;
800:   }
801:   PetscMalloc1(degree + 1, &degrees);
802:   PetscMalloc1((degree + 1) * npoints, &psingle);
803:   for (i = 0; i <= degree; i++) degrees[i] = i;
804:   for (i = 0; i <= k; i++) {
805:     PetscDTJacobiEval_Internal(npoints, alpha, beta, i, points, degree + 1, degrees, psingle);
806:     for (j = 0; j <= degree; j++) {
807:       for (l = 0; l < npoints; l++) p[(j * (k + 1) + i) * npoints + l] = psingle[l * (degree + 1) + j];
808:     }
809:   }
810:   PetscFree(psingle);
811:   PetscFree(degrees);
812:   return 0;
813: }

815: /*@
816:    PetscDTJacobiEval - evaluate Jacobi polynomials for the weight function $(1.+x)^{\alpha} (1.-x)^{\beta}$ at a set of points
817:                        at points

819:    Not Collective

821:    Input Parameters:
822: +  npoints - number of spatial points to evaluate at
823: .  alpha - the left exponent > -1
824: .  beta - the right exponent > -1
825: .  points - array of locations to evaluate at
826: .  ndegree - number of basis degrees to evaluate
827: -  degrees - sorted array of degrees to evaluate

829:    Output Parameters:
830: +  B - row-oriented basis evaluation matrix B[point*ndegree + degree] (dimension npoints*ndegrees, allocated by caller) (or NULL)
831: .  D - row-oriented derivative evaluation matrix (or NULL)
832: -  D2 - row-oriented second derivative evaluation matrix (or NULL)

834:    Level: intermediate

836: .seealso: `PetscDTGaussQuadrature()`, `PetscDTLegendreEval()`
837: @*/
838: PetscErrorCode PetscDTJacobiEval(PetscInt npoints, PetscReal alpha, PetscReal beta, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *B, PetscReal *D, PetscReal *D2)
839: {
842:   if (!npoints || !ndegree) return 0;
843:   if (B) PetscDTJacobiEval_Internal(npoints, alpha, beta, 0, points, ndegree, degrees, B);
844:   if (D) PetscDTJacobiEval_Internal(npoints, alpha, beta, 1, points, ndegree, degrees, D);
845:   if (D2) PetscDTJacobiEval_Internal(npoints, alpha, beta, 2, points, ndegree, degrees, D2);
846:   return 0;
847: }

849: /*@
850:    PetscDTLegendreEval - evaluate Legendre polynomials at points

852:    Not Collective

854:    Input Parameters:
855: +  npoints - number of spatial points to evaluate at
856: .  points - array of locations to evaluate at
857: .  ndegree - number of basis degrees to evaluate
858: -  degrees - sorted array of degrees to evaluate

860:    Output Parameters:
861: +  B - row-oriented basis evaluation matrix B[point*ndegree + degree] (dimension npoints*ndegrees, allocated by caller) (or NULL)
862: .  D - row-oriented derivative evaluation matrix (or NULL)
863: -  D2 - row-oriented second derivative evaluation matrix (or NULL)

865:    Level: intermediate

867: .seealso: `PetscDTGaussQuadrature()`
868: @*/
869: PetscErrorCode PetscDTLegendreEval(PetscInt npoints, const PetscReal *points, PetscInt ndegree, const PetscInt *degrees, PetscReal *B, PetscReal *D, PetscReal *D2)
870: {
871:   PetscDTJacobiEval(npoints, 0., 0., points, ndegree, degrees, B, D, D2);
872:   return 0;
873: }

875: /*@
876:   PetscDTIndexToGradedOrder - convert an index into a tuple of monomial degrees in a graded order (that is, if the degree sum of tuple x is less than the degree sum of tuple y, then the index of x is smaller than the index of y)

878:   Input Parameters:
879: + len - the desired length of the degree tuple
880: - index - the index to convert: should be >= 0

882:   Output Parameter:
883: . degtup - will be filled with a tuple of degrees

885:   Level: beginner

887:   Note:
888:   For two tuples x and y with the same degree sum, partial degree sums over the final elements of the tuples
889:   acts as a tiebreaker.  For example, (2, 1, 1) and (1, 2, 1) have the same degree sum, but the degree sum over the
890:   last two elements is smaller for the former, so (2, 1, 1) < (1, 2, 1).

892: .seealso: `PetscDTGradedOrderToIndex()`
893: @*/
894: PetscErrorCode PetscDTIndexToGradedOrder(PetscInt len, PetscInt index, PetscInt degtup[])
895: {
896:   PetscInt i, total;
897:   PetscInt sum;

902:   total = 1;
903:   sum   = 0;
904:   while (index >= total) {
905:     index -= total;
906:     total = (total * (len + sum)) / (sum + 1);
907:     sum++;
908:   }
909:   for (i = 0; i < len; i++) {
910:     PetscInt c;

912:     degtup[i] = sum;
913:     for (c = 0, total = 1; c < sum; c++) {
914:       /* going into the loop, total is the number of way to have a tuple of sum exactly c with length len - 1 - i */
915:       if (index < total) break;
916:       index -= total;
917:       total = (total * (len - 1 - i + c)) / (c + 1);
918:       degtup[i]--;
919:     }
920:     sum -= degtup[i];
921:   }
922:   return 0;
923: }

925: /*@
926:   PetscDTGradedOrderToIndex - convert a tuple into an index in a graded order, the inverse of `PetscDTIndexToGradedOrder()`.

928:   Input Parameters:
929: + len - the length of the degree tuple
930: - degtup - tuple with this length

932:   Output Parameter:
933: . index - index in graded order: >= 0

935:   Level: Beginner

937:   Note:
938:   For two tuples x and y with the same degree sum, partial degree sums over the final elements of the tuples
939:   acts as a tiebreaker.  For example, (2, 1, 1) and (1, 2, 1) have the same degree sum, but the degree sum over the
940:   last two elements is smaller for the former, so (2, 1, 1) < (1, 2, 1).

942: .seealso: `PetscDTIndexToGradedOrder()`
943: @*/
944: PetscErrorCode PetscDTGradedOrderToIndex(PetscInt len, const PetscInt degtup[], PetscInt *index)
945: {
946:   PetscInt i, idx, sum, total;

950:   for (i = 0, sum = 0; i < len; i++) sum += degtup[i];
951:   idx   = 0;
952:   total = 1;
953:   for (i = 0; i < sum; i++) {
954:     idx += total;
955:     total = (total * (len + i)) / (i + 1);
956:   }
957:   for (i = 0; i < len - 1; i++) {
958:     PetscInt c;

960:     total = 1;
961:     sum -= degtup[i];
962:     for (c = 0; c < sum; c++) {
963:       idx += total;
964:       total = (total * (len - 1 - i + c)) / (c + 1);
965:     }
966:   }
967:   *index = idx;
968:   return 0;
969: }

971: static PetscBool PKDCite       = PETSC_FALSE;
972: const char       PKDCitation[] = "@article{Kirby2010,\n"
973:                                  "  title={Singularity-free evaluation of collapsed-coordinate orthogonal polynomials},\n"
974:                                  "  author={Kirby, Robert C},\n"
975:                                  "  journal={ACM Transactions on Mathematical Software (TOMS)},\n"
976:                                  "  volume={37},\n"
977:                                  "  number={1},\n"
978:                                  "  pages={1--16},\n"
979:                                  "  year={2010},\n"
980:                                  "  publisher={ACM New York, NY, USA}\n}\n";

982: /*@
983:   PetscDTPKDEvalJet - Evaluate the jet (function and derivatives) of the Proriol-Koornwinder-Dubiner (PKD) basis for
984:   the space of polynomials up to a given degree.  The PKD basis is L2-orthonormal on the biunit simplex (which is used
985:   as the reference element for finite elements in PETSc), which makes it a stable basis to use for evaluating
986:   polynomials in that domain.

988:   Input Parameters:
989: + dim - the number of variables in the multivariate polynomials
990: . npoints - the number of points to evaluate the polynomials at
991: . points - [npoints x dim] array of point coordinates
992: . degree - the degree (sum of degrees on the variables in a monomial) of the polynomial space to evaluate.  There are ((dim + degree) choose dim) polynomials in this space.
993: - k - the maximum order partial derivative to evaluate in the jet.  There are (dim + k choose dim) partial derivatives
994:   in the jet.  Choosing k = 0 means to evaluate just the function and no derivatives

996:   Output Parameters:
997: - p - an array containing the evaluations of the PKD polynomials' jets on the points.  The size is ((dim + degree)
998:   choose dim) x ((dim + k) choose dim) x npoints, which also describes the order of the dimensions of this
999:   three-dimensional array: the first (slowest varying) dimension is basis function index; the second dimension is jet
1000:   index; the third (fastest varying) dimension is the index of the evaluation point.

1002:   Level: advanced

1004:   Notes:
1005:   The ordering of the basis functions, and the ordering of the derivatives in the jet, both follow the graded
1006:   ordering of `PetscDTIndexToGradedOrder()` and `PetscDTGradedOrderToIndex()`.  For example, in 3D, the polynomial with
1007:   leading monomial x^2,y^0,z^1, which has degree tuple (2,0,1), which by `PetscDTGradedOrderToIndex()` has index 12 (it is the 13th basis function in the space);
1008:   the partial derivative $\partial_x \partial_z$ has order tuple (1,0,1), appears at index 6 in the jet (it is the 7th partial derivative in the jet).

1010:   The implementation uses Kirby's singularity-free evaluation algorithm, https://doi.org/10.1145/1644001.1644006.

1012: .seealso: `PetscDTGradedOrderToIndex()`, `PetscDTIndexToGradedOrder()`, `PetscDTJacobiEvalJet()`
1013: @*/
1014: PetscErrorCode PetscDTPKDEvalJet(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt k, PetscReal p[])
1015: {
1016:   PetscInt   degidx, kidx, d, pt;
1017:   PetscInt   Nk, Ndeg;
1018:   PetscInt  *ktup, *degtup;
1019:   PetscReal *scales, initscale, scaleexp;

1021:   PetscCitationsRegister(PKDCitation, &PKDCite);
1022:   PetscDTBinomialInt(dim + k, k, &Nk);
1023:   PetscDTBinomialInt(degree + dim, degree, &Ndeg);
1024:   PetscMalloc2(dim, &degtup, dim, &ktup);
1025:   PetscMalloc1(Ndeg, &scales);
1026:   initscale = 1.;
1027:   if (dim > 1) {
1028:     PetscDTBinomial(dim, 2, &scaleexp);
1029:     initscale = PetscPowReal(2., scaleexp * 0.5);
1030:   }
1031:   for (degidx = 0; degidx < Ndeg; degidx++) {
1032:     PetscInt  e, i;
1033:     PetscInt  m1idx = -1, m2idx = -1;
1034:     PetscInt  n;
1035:     PetscInt  degsum;
1036:     PetscReal alpha;
1037:     PetscReal cnm1, cnm1x, cnm2;
1038:     PetscReal norm;

1040:     PetscDTIndexToGradedOrder(dim, degidx, degtup);
1041:     for (d = dim - 1; d >= 0; d--)
1042:       if (degtup[d]) break;
1043:     if (d < 0) { /* constant is 1 everywhere, all derivatives are zero */
1044:       scales[degidx] = initscale;
1045:       for (e = 0; e < dim; e++) {
1046:         PetscDTJacobiNorm(e, 0., 0, &norm);
1047:         scales[degidx] /= norm;
1048:       }
1049:       for (i = 0; i < npoints; i++) p[degidx * Nk * npoints + i] = 1.;
1050:       for (i = 0; i < (Nk - 1) * npoints; i++) p[(degidx * Nk + 1) * npoints + i] = 0.;
1051:       continue;
1052:     }
1053:     n = degtup[d];
1054:     degtup[d]--;
1055:     PetscDTGradedOrderToIndex(dim, degtup, &m1idx);
1056:     if (degtup[d] > 0) {
1057:       degtup[d]--;
1058:       PetscDTGradedOrderToIndex(dim, degtup, &m2idx);
1059:       degtup[d]++;
1060:     }
1061:     degtup[d]++;
1062:     for (e = 0, degsum = 0; e < d; e++) degsum += degtup[e];
1063:     alpha = 2 * degsum + d;
1064:     PetscDTJacobiRecurrence_Internal(n, alpha, 0., cnm1, cnm1x, cnm2);

1066:     scales[degidx] = initscale;
1067:     for (e = 0, degsum = 0; e < dim; e++) {
1068:       PetscInt  f;
1069:       PetscReal ealpha;
1070:       PetscReal enorm;

1072:       ealpha = 2 * degsum + e;
1073:       for (f = 0; f < degsum; f++) scales[degidx] *= 2.;
1074:       PetscDTJacobiNorm(ealpha, 0., degtup[e], &enorm);
1075:       scales[degidx] /= enorm;
1076:       degsum += degtup[e];
1077:     }

1079:     for (pt = 0; pt < npoints; pt++) {
1080:       /* compute the multipliers */
1081:       PetscReal thetanm1, thetanm1x, thetanm2;

1083:       thetanm1x = dim - (d + 1) + 2. * points[pt * dim + d];
1084:       for (e = d + 1; e < dim; e++) thetanm1x += points[pt * dim + e];
1085:       thetanm1x *= 0.5;
1086:       thetanm1 = (2. - (dim - (d + 1)));
1087:       for (e = d + 1; e < dim; e++) thetanm1 -= points[pt * dim + e];
1088:       thetanm1 *= 0.5;
1089:       thetanm2 = thetanm1 * thetanm1;

1091:       for (kidx = 0; kidx < Nk; kidx++) {
1092:         PetscInt f;

1094:         PetscDTIndexToGradedOrder(dim, kidx, ktup);
1095:         /* first sum in the same derivative terms */
1096:         p[(degidx * Nk + kidx) * npoints + pt] = (cnm1 * thetanm1 + cnm1x * thetanm1x) * p[(m1idx * Nk + kidx) * npoints + pt];
1097:         if (m2idx >= 0) p[(degidx * Nk + kidx) * npoints + pt] -= cnm2 * thetanm2 * p[(m2idx * Nk + kidx) * npoints + pt];

1099:         for (f = d; f < dim; f++) {
1100:           PetscInt km1idx, mplty = ktup[f];

1102:           if (!mplty) continue;
1103:           ktup[f]--;
1104:           PetscDTGradedOrderToIndex(dim, ktup, &km1idx);

1106:           /* the derivative of  cnm1x * thetanm1x  wrt x variable f is 0.5 * cnm1x if f > d otherwise it is cnm1x */
1107:           /* the derivative of  cnm1  * thetanm1   wrt x variable f is 0 if f == d, otherwise it is -0.5 * cnm1 */
1108:           /* the derivative of -cnm2  * thetanm2   wrt x variable f is 0 if f == d, otherwise it is cnm2 * thetanm1 */
1109:           if (f > d) {
1110:             PetscInt f2;

1112:             p[(degidx * Nk + kidx) * npoints + pt] += mplty * 0.5 * (cnm1x - cnm1) * p[(m1idx * Nk + km1idx) * npoints + pt];
1113:             if (m2idx >= 0) {
1114:               p[(degidx * Nk + kidx) * npoints + pt] += mplty * cnm2 * thetanm1 * p[(m2idx * Nk + km1idx) * npoints + pt];
1115:               /* second derivatives of -cnm2  * thetanm2   wrt x variable f,f2 is like - 0.5 * cnm2 */
1116:               for (f2 = f; f2 < dim; f2++) {
1117:                 PetscInt km2idx, mplty2 = ktup[f2];
1118:                 PetscInt factor;

1120:                 if (!mplty2) continue;
1121:                 ktup[f2]--;
1122:                 PetscDTGradedOrderToIndex(dim, ktup, &km2idx);

1124:                 factor = mplty * mplty2;
1125:                 if (f == f2) factor /= 2;
1126:                 p[(degidx * Nk + kidx) * npoints + pt] -= 0.5 * factor * cnm2 * p[(m2idx * Nk + km2idx) * npoints + pt];
1127:                 ktup[f2]++;
1128:               }
1129:             }
1130:           } else {
1131:             p[(degidx * Nk + kidx) * npoints + pt] += mplty * cnm1x * p[(m1idx * Nk + km1idx) * npoints + pt];
1132:           }
1133:           ktup[f]++;
1134:         }
1135:       }
1136:     }
1137:   }
1138:   for (degidx = 0; degidx < Ndeg; degidx++) {
1139:     PetscReal scale = scales[degidx];
1140:     PetscInt  i;

1142:     for (i = 0; i < Nk * npoints; i++) p[degidx * Nk * npoints + i] *= scale;
1143:   }
1144:   PetscFree(scales);
1145:   PetscFree2(degtup, ktup);
1146:   return 0;
1147: }

1149: /*@
1150:   PetscDTPTrimmedSize - The size of the trimmed polynomial space of k-forms with a given degree and form degree,
1151:   which can be evaluated in `PetscDTPTrimmedEvalJet()`.

1153:   Input Parameters:
1154: + dim - the number of variables in the multivariate polynomials
1155: . degree - the degree (sum of degrees on the variables in a monomial) of the trimmed polynomial space.
1156: - formDegree - the degree of the form

1158:   Output Parameters:
1159: - size - The number ((dim + degree) choose (dim + formDegree)) x ((degree + formDegree - 1) choose (formDegree))

1161:   Level: advanced

1163: .seealso: `PetscDTPTrimmedEvalJet()`
1164: @*/
1165: PetscErrorCode PetscDTPTrimmedSize(PetscInt dim, PetscInt degree, PetscInt formDegree, PetscInt *size)
1166: {
1167:   PetscInt Nrk, Nbpt; // number of trimmed polynomials

1169:   formDegree = PetscAbsInt(formDegree);
1170:   PetscDTBinomialInt(degree + dim, degree + formDegree, &Nbpt);
1171:   PetscDTBinomialInt(degree + formDegree - 1, formDegree, &Nrk);
1172:   Nbpt *= Nrk;
1173:   *size = Nbpt;
1174:   return 0;
1175: }

1177: /* there was a reference implementation based on section 4.4 of Arnold, Falk & Winther (acta numerica, 2006), but it
1178:  * was inferior to this implementation */
1179: static PetscErrorCode PetscDTPTrimmedEvalJet_Internal(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt formDegree, PetscInt jetDegree, PetscReal p[])
1180: {
1181:   PetscInt  formDegreeOrig = formDegree;
1182:   PetscBool formNegative   = (formDegreeOrig < 0) ? PETSC_TRUE : PETSC_FALSE;

1184:   formDegree = PetscAbsInt(formDegreeOrig);
1185:   if (formDegree == 0) {
1186:     PetscDTPKDEvalJet(dim, npoints, points, degree, jetDegree, p);
1187:     return 0;
1188:   }
1189:   if (formDegree == dim) {
1190:     PetscDTPKDEvalJet(dim, npoints, points, degree - 1, jetDegree, p);
1191:     return 0;
1192:   }
1193:   PetscInt Nbpt;
1194:   PetscDTPTrimmedSize(dim, degree, formDegree, &Nbpt);
1195:   PetscInt Nf;
1196:   PetscDTBinomialInt(dim, formDegree, &Nf);
1197:   PetscInt Nk;
1198:   PetscDTBinomialInt(dim + jetDegree, dim, &Nk);
1199:   PetscArrayzero(p, Nbpt * Nf * Nk * npoints);

1201:   PetscInt Nbpm1; // number of scalar polynomials up to degree - 1;
1202:   PetscDTBinomialInt(dim + degree - 1, dim, &Nbpm1);
1203:   PetscReal *p_scalar;
1204:   PetscMalloc1(Nbpm1 * Nk * npoints, &p_scalar);
1205:   PetscDTPKDEvalJet(dim, npoints, points, degree - 1, jetDegree, p_scalar);
1206:   PetscInt total = 0;
1207:   // First add the full polynomials up to degree - 1 into the basis: take the scalar
1208:   // and copy one for each form component
1209:   for (PetscInt i = 0; i < Nbpm1; i++) {
1210:     const PetscReal *src = &p_scalar[i * Nk * npoints];
1211:     for (PetscInt f = 0; f < Nf; f++) {
1212:       PetscReal *dest = &p[(total++ * Nf + f) * Nk * npoints];
1213:       PetscArraycpy(dest, src, Nk * npoints);
1214:     }
1215:   }
1216:   PetscInt *form_atoms;
1217:   PetscMalloc1(formDegree + 1, &form_atoms);
1218:   // construct the interior product pattern
1219:   PetscInt(*pattern)[3];
1220:   PetscInt Nf1; // number of formDegree + 1 forms
1221:   PetscDTBinomialInt(dim, formDegree + 1, &Nf1);
1222:   PetscInt nnz = Nf1 * (formDegree + 1);
1223:   PetscMalloc1(Nf1 * (formDegree + 1), &pattern);
1224:   PetscDTAltVInteriorPattern(dim, formDegree + 1, pattern);
1225:   PetscReal centroid = (1. - dim) / (dim + 1.);
1226:   PetscInt *deriv;
1227:   PetscMalloc1(dim, &deriv);
1228:   for (PetscInt d = dim; d >= formDegree + 1; d--) {
1229:     PetscInt Nfd1; // number of formDegree + 1 forms in dimension d that include dx_0
1230:                    // (equal to the number of formDegree forms in dimension d-1)
1231:     PetscDTBinomialInt(d - 1, formDegree, &Nfd1);
1232:     // The number of homogeneous (degree-1) scalar polynomials in d variables
1233:     PetscInt Nh;
1234:     PetscDTBinomialInt(d - 1 + degree - 1, d - 1, &Nh);
1235:     const PetscReal *h_scalar = &p_scalar[(Nbpm1 - Nh) * Nk * npoints];
1236:     for (PetscInt b = 0; b < Nh; b++) {
1237:       const PetscReal *h_s = &h_scalar[b * Nk * npoints];
1238:       for (PetscInt f = 0; f < Nfd1; f++) {
1239:         // construct all formDegree+1 forms that start with dx_(dim - d) /\ ...
1240:         form_atoms[0] = dim - d;
1241:         PetscDTEnumSubset(d - 1, formDegree, f, &form_atoms[1]);
1242:         for (PetscInt i = 0; i < formDegree; i++) form_atoms[1 + i] += form_atoms[0] + 1;
1243:         PetscInt f_ind; // index of the resulting form
1244:         PetscDTSubsetIndex(dim, formDegree + 1, form_atoms, &f_ind);
1245:         PetscReal *p_f = &p[total++ * Nf * Nk * npoints];
1246:         for (PetscInt nz = 0; nz < nnz; nz++) {
1247:           PetscInt  i     = pattern[nz][0]; // formDegree component
1248:           PetscInt  j     = pattern[nz][1]; // (formDegree + 1) component
1249:           PetscInt  v     = pattern[nz][2]; // coordinate component
1250:           PetscReal scale = v < 0 ? -1. : 1.;

1252:           i     = formNegative ? (Nf - 1 - i) : i;
1253:           scale = (formNegative && (i & 1)) ? -scale : scale;
1254:           v     = v < 0 ? -(v + 1) : v;
1255:           if (j != f_ind) continue;
1256:           PetscReal *p_i = &p_f[i * Nk * npoints];
1257:           for (PetscInt jet = 0; jet < Nk; jet++) {
1258:             const PetscReal *h_jet = &h_s[jet * npoints];
1259:             PetscReal       *p_jet = &p_i[jet * npoints];

1261:             for (PetscInt pt = 0; pt < npoints; pt++) p_jet[pt] += scale * h_jet[pt] * (points[pt * dim + v] - centroid);
1262:             PetscDTIndexToGradedOrder(dim, jet, deriv);
1263:             deriv[v]++;
1264:             PetscReal mult = deriv[v];
1265:             PetscInt  l;
1266:             PetscDTGradedOrderToIndex(dim, deriv, &l);
1267:             if (l >= Nk) continue;
1268:             p_jet = &p_i[l * npoints];
1269:             for (PetscInt pt = 0; pt < npoints; pt++) p_jet[pt] += scale * mult * h_jet[pt];
1270:             deriv[v]--;
1271:           }
1272:         }
1273:       }
1274:     }
1275:   }
1277:   PetscFree(deriv);
1278:   PetscFree(pattern);
1279:   PetscFree(form_atoms);
1280:   PetscFree(p_scalar);
1281:   return 0;
1282: }

1284: /*@
1285:   PetscDTPTrimmedEvalJet - Evaluate the jet (function and derivatives) of a basis of the trimmed polynomial k-forms up to
1286:   a given degree.

1288:   Input Parameters:
1289: + dim - the number of variables in the multivariate polynomials
1290: . npoints - the number of points to evaluate the polynomials at
1291: . points - [npoints x dim] array of point coordinates
1292: . degree - the degree (sum of degrees on the variables in a monomial) of the trimmed polynomial space to evaluate.
1293:            There are ((dim + degree) choose (dim + formDegree)) x ((degree + formDegree - 1) choose (formDegree)) polynomials in this space.
1294:            (You can use `PetscDTPTrimmedSize()` to compute this size.)
1295: . formDegree - the degree of the form
1296: - jetDegree - the maximum order partial derivative to evaluate in the jet.  There are ((dim + jetDegree) choose dim) partial derivatives
1297:               in the jet.  Choosing jetDegree = 0 means to evaluate just the function and no derivatives

1299:   Output Parameters:
1300: - p - an array containing the evaluations of the PKD polynomials' jets on the points.  The size is
1301:       `PetscDTPTrimmedSize()` x ((dim + formDegree) choose dim) x ((dim + k) choose dim) x npoints,
1302:       which also describes the order of the dimensions of this
1303:       four-dimensional array:
1304:         the first (slowest varying) dimension is basis function index;
1305:         the second dimension is component of the form;
1306:         the third dimension is jet index;
1307:         the fourth (fastest varying) dimension is the index of the evaluation point.

1309:   Level: advanced

1311:   Notes:
1312:   The ordering of the basis functions is not graded, so the basis functions are not nested by degree like `PetscDTPKDEvalJet()`.
1313:   The basis functions are not an L2-orthonormal basis on any particular domain.

1315:   The implementation is based on the description of the trimmed polynomials up to degree r as
1316:   the direct sum of polynomials up to degree (r-1) and the Koszul differential applied to
1317:   homogeneous polynomials of degree (r-1).

1319: .seealso: `PetscDTPKDEvalJet()`, `PetscDTPTrimmedSize()`
1320: @*/
1321: PetscErrorCode PetscDTPTrimmedEvalJet(PetscInt dim, PetscInt npoints, const PetscReal points[], PetscInt degree, PetscInt formDegree, PetscInt jetDegree, PetscReal p[])
1322: {
1323:   PetscDTPTrimmedEvalJet_Internal(dim, npoints, points, degree, formDegree, jetDegree, p);
1324:   return 0;
1325: }

1327: /* solve the symmetric tridiagonal eigenvalue system, writing the eigenvalues into eigs and the eigenvectors into V
1328:  * with lds n; diag and subdiag are overwritten */
1329: static PetscErrorCode PetscDTSymmetricTridiagonalEigensolve(PetscInt n, PetscReal diag[], PetscReal subdiag[], PetscReal eigs[], PetscScalar V[])
1330: {
1331:   char          jobz   = 'V'; /* eigenvalues and eigenvectors */
1332:   char          range  = 'A'; /* all eigenvalues will be found */
1333:   PetscReal     VL     = 0.;  /* ignored because range is 'A' */
1334:   PetscReal     VU     = 0.;  /* ignored because range is 'A' */
1335:   PetscBLASInt  IL     = 0;   /* ignored because range is 'A' */
1336:   PetscBLASInt  IU     = 0;   /* ignored because range is 'A' */
1337:   PetscReal     abstol = 0.;  /* unused */
1338:   PetscBLASInt  bn, bm, ldz;  /* bm will equal bn on exit */
1339:   PetscBLASInt *isuppz;
1340:   PetscBLASInt  lwork, liwork;
1341:   PetscReal     workquery;
1342:   PetscBLASInt  iworkquery;
1343:   PetscBLASInt *iwork;
1344:   PetscBLASInt  info;
1345:   PetscReal    *work = NULL;

1347: #if !defined(PETSCDTGAUSSIANQUADRATURE_EIG)
1348:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "A LAPACK symmetric tridiagonal eigensolver could not be found");
1349: #endif
1350:   PetscBLASIntCast(n, &bn);
1351:   PetscBLASIntCast(n, &ldz);
1352: #if !defined(PETSC_MISSING_LAPACK_STEGR)
1353:   PetscMalloc1(2 * n, &isuppz);
1354:   lwork  = -1;
1355:   liwork = -1;
1356:   PetscCallBLAS("LAPACKstegr", LAPACKstegr_(&jobz, &range, &bn, diag, subdiag, &VL, &VU, &IL, &IU, &abstol, &bm, eigs, V, &ldz, isuppz, &workquery, &lwork, &iworkquery, &liwork, &info));
1358:   lwork  = (PetscBLASInt)workquery;
1359:   liwork = (PetscBLASInt)iworkquery;
1360:   PetscMalloc2(lwork, &work, liwork, &iwork);
1361:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1362:   PetscCallBLAS("LAPACKstegr", LAPACKstegr_(&jobz, &range, &bn, diag, subdiag, &VL, &VU, &IL, &IU, &abstol, &bm, eigs, V, &ldz, isuppz, work, &lwork, iwork, &liwork, &info));
1363:   PetscFPTrapPop();
1365:   PetscFree2(work, iwork);
1366:   PetscFree(isuppz);
1367: #elif !defined(PETSC_MISSING_LAPACK_STEQR)
1368:   jobz = 'I'; /* Compute eigenvalues and eigenvectors of the
1369:                  tridiagonal matrix.  Z is initialized to the identity
1370:                  matrix. */
1371:   PetscMalloc1(PetscMax(1, 2 * n - 2), &work);
1372:   PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("I", &bn, diag, subdiag, V, &ldz, work, &info));
1373:   PetscFPTrapPop();
1375:   PetscFree(work);
1376:   PetscArraycpy(eigs, diag, n);
1377: #endif
1378:   return 0;
1379: }

1381: /* Formula for the weights at the endpoints (-1 and 1) of Gauss-Lobatto-Jacobi
1382:  * quadrature rules on the interval [-1, 1] */
1383: static PetscErrorCode PetscDTGaussLobattoJacobiEndweights_Internal(PetscInt n, PetscReal alpha, PetscReal beta, PetscReal *leftw, PetscReal *rightw)
1384: {
1385:   PetscReal twoab1;
1386:   PetscInt  m = n - 2;
1387:   PetscReal a = alpha + 1.;
1388:   PetscReal b = beta + 1.;
1389:   PetscReal gra, grb;

1391:   twoab1 = PetscPowReal(2., a + b - 1.);
1392: #if defined(PETSC_HAVE_LGAMMA)
1393:   grb = PetscExpReal(2. * PetscLGamma(b + 1.) + PetscLGamma(m + 1.) + PetscLGamma(m + a + 1.) - (PetscLGamma(m + b + 1) + PetscLGamma(m + a + b + 1.)));
1394:   gra = PetscExpReal(2. * PetscLGamma(a + 1.) + PetscLGamma(m + 1.) + PetscLGamma(m + b + 1.) - (PetscLGamma(m + a + 1) + PetscLGamma(m + a + b + 1.)));
1395: #else
1396:   {
1397:     PetscInt alphai = (PetscInt)alpha;
1398:     PetscInt betai  = (PetscInt)beta;

1400:     if ((PetscReal)alphai == alpha && (PetscReal)betai == beta) {
1401:       PetscReal binom1, binom2;

1403:       PetscDTBinomial(m + b, b, &binom1);
1404:       PetscDTBinomial(m + a + b, b, &binom2);
1405:       grb = 1. / (binom1 * binom2);
1406:       PetscDTBinomial(m + a, a, &binom1);
1407:       PetscDTBinomial(m + a + b, a, &binom2);
1408:       gra = 1. / (binom1 * binom2);
1409:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
1410:   }
1411: #endif
1412:   *leftw  = twoab1 * grb / b;
1413:   *rightw = twoab1 * gra / a;
1414:   return 0;
1415: }

1417: /* Evaluates the nth jacobi polynomial with weight parameters a,b at a point x.
1418:    Recurrence relations implemented from the pseudocode given in Karniadakis and Sherwin, Appendix B */
1419: static inline PetscErrorCode PetscDTComputeJacobi(PetscReal a, PetscReal b, PetscInt n, PetscReal x, PetscReal *P)
1420: {
1421:   PetscReal pn1, pn2;
1422:   PetscReal cnm1, cnm1x, cnm2;
1423:   PetscInt  k;

1425:   if (!n) {
1426:     *P = 1.0;
1427:     return 0;
1428:   }
1429:   PetscDTJacobiRecurrence_Internal(1, a, b, cnm1, cnm1x, cnm2);
1430:   pn2 = 1.;
1431:   pn1 = cnm1 + cnm1x * x;
1432:   if (n == 1) {
1433:     *P = pn1;
1434:     return 0;
1435:   }
1436:   *P = 0.0;
1437:   for (k = 2; k < n + 1; ++k) {
1438:     PetscDTJacobiRecurrence_Internal(k, a, b, cnm1, cnm1x, cnm2);

1440:     *P  = (cnm1 + cnm1x * x) * pn1 - cnm2 * pn2;
1441:     pn2 = pn1;
1442:     pn1 = *P;
1443:   }
1444:   return 0;
1445: }

1447: /* Evaluates the first derivative of P_{n}^{a,b} at a point x. */
1448: static inline PetscErrorCode PetscDTComputeJacobiDerivative(PetscReal a, PetscReal b, PetscInt n, PetscReal x, PetscInt k, PetscReal *P)
1449: {
1450:   PetscReal nP;
1451:   PetscInt  i;

1453:   *P = 0.0;
1454:   if (k > n) return 0;
1455:   PetscDTComputeJacobi(a + k, b + k, n - k, x, &nP);
1456:   for (i = 0; i < k; i++) nP *= (a + b + n + 1. + i) * 0.5;
1457:   *P = nP;
1458:   return 0;
1459: }

1461: static PetscErrorCode PetscDTGaussJacobiQuadrature_Newton_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscReal x[], PetscReal w[])
1462: {
1463:   PetscInt  maxIter = 100;
1464:   PetscReal eps     = PetscExpReal(0.75 * PetscLogReal(PETSC_MACHINE_EPSILON));
1465:   PetscReal a1, a6, gf;
1466:   PetscInt  k;


1469:   a1 = PetscPowReal(2.0, a + b + 1);
1470: #if defined(PETSC_HAVE_LGAMMA)
1471:   {
1472:     PetscReal a2, a3, a4, a5;
1473:     a2 = PetscLGamma(a + npoints + 1);
1474:     a3 = PetscLGamma(b + npoints + 1);
1475:     a4 = PetscLGamma(a + b + npoints + 1);
1476:     a5 = PetscLGamma(npoints + 1);
1477:     gf = PetscExpReal(a2 + a3 - (a4 + a5));
1478:   }
1479: #else
1480:   {
1481:     PetscInt ia, ib;

1483:     ia = (PetscInt)a;
1484:     ib = (PetscInt)b;
1485:     gf = 1.;
1486:     if (ia == a && ia >= 0) { /* compute ratio of rising factorals wrt a */
1487:       for (k = 0; k < ia; k++) gf *= (npoints + 1. + k) / (npoints + b + 1. + k);
1488:     } else if (b == b && ib >= 0) { /* compute ratio of rising factorials wrt b */
1489:       for (k = 0; k < ib; k++) gf *= (npoints + 1. + k) / (npoints + a + 1. + k);
1490:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "lgamma() - math routine is unavailable.");
1491:   }
1492: #endif

1494:   a6 = a1 * gf;
1495:   /* Computes the m roots of P_{m}^{a,b} on [-1,1] by Newton's method with Chebyshev points as initial guesses.
1496:    Algorithm implemented from the pseudocode given by Karniadakis and Sherwin and Python in FIAT */
1497:   for (k = 0; k < npoints; ++k) {
1498:     PetscReal r = PetscCosReal(PETSC_PI * (1. - (4. * k + 3. + 2. * b) / (4. * npoints + 2. * (a + b + 1.)))), dP;
1499:     PetscInt  j;

1501:     if (k > 0) r = 0.5 * (r + x[k - 1]);
1502:     for (j = 0; j < maxIter; ++j) {
1503:       PetscReal s = 0.0, delta, f, fp;
1504:       PetscInt  i;

1506:       for (i = 0; i < k; ++i) s = s + 1.0 / (r - x[i]);
1507:       PetscDTComputeJacobi(a, b, npoints, r, &f);
1508:       PetscDTComputeJacobiDerivative(a, b, npoints, r, 1, &fp);
1509:       delta = f / (fp - f * s);
1510:       r     = r - delta;
1511:       if (PetscAbsReal(delta) < eps) break;
1512:     }
1513:     x[k] = r;
1514:     PetscDTComputeJacobiDerivative(a, b, npoints, x[k], 1, &dP);
1515:     w[k] = a6 / (1.0 - PetscSqr(x[k])) / PetscSqr(dP);
1516:   }
1517:   return 0;
1518: }

1520: /* Compute the diagonals of the Jacobi matrix used in Golub & Welsch algorithms for Gauss-Jacobi
1521:  * quadrature weight calculations on [-1,1] for exponents (1. + x)^a (1.-x)^b */
1522: static PetscErrorCode PetscDTJacobiMatrix_Internal(PetscInt nPoints, PetscReal a, PetscReal b, PetscReal *d, PetscReal *s)
1523: {
1524:   PetscInt i;

1526:   for (i = 0; i < nPoints; i++) {
1527:     PetscReal A, B, C;

1529:     PetscDTJacobiRecurrence_Internal(i + 1, a, b, A, B, C);
1530:     d[i] = -A / B;
1531:     if (i) s[i - 1] *= C / B;
1532:     if (i < nPoints - 1) s[i] = 1. / B;
1533:   }
1534:   return 0;
1535: }

1537: static PetscErrorCode PetscDTGaussJacobiQuadrature_GolubWelsch_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscReal *x, PetscReal *w)
1538: {
1539:   PetscReal mu0;
1540:   PetscReal ga, gb, gab;
1541:   PetscInt  i;

1543:   PetscCitationsRegister(GolubWelschCitation, &GolubWelschCite);

1545: #if defined(PETSC_HAVE_TGAMMA)
1546:   ga  = PetscTGamma(a + 1);
1547:   gb  = PetscTGamma(b + 1);
1548:   gab = PetscTGamma(a + b + 2);
1549: #else
1550:   {
1551:     PetscInt ia, ib;

1553:     ia = (PetscInt)a;
1554:     ib = (PetscInt)b;
1555:     if (ia == a && ib == b && ia + 1 > 0 && ib + 1 > 0 && ia + ib + 2 > 0) { /* All gamma(x) terms are (x-1)! terms */
1556:       PetscDTFactorial(ia, &ga);
1557:       PetscDTFactorial(ib, &gb);
1558:       PetscDTFactorial(ia + ib + 1, &gb);
1559:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "tgamma() - math routine is unavailable.");
1560:   }
1561: #endif
1562:   mu0 = PetscPowReal(2., a + b + 1.) * ga * gb / gab;

1564: #if defined(PETSCDTGAUSSIANQUADRATURE_EIG)
1565:   {
1566:     PetscReal   *diag, *subdiag;
1567:     PetscScalar *V;

1569:     PetscMalloc2(npoints, &diag, npoints, &subdiag);
1570:     PetscMalloc1(npoints * npoints, &V);
1571:     PetscDTJacobiMatrix_Internal(npoints, a, b, diag, subdiag);
1572:     for (i = 0; i < npoints - 1; i++) subdiag[i] = PetscSqrtReal(subdiag[i]);
1573:     PetscDTSymmetricTridiagonalEigensolve(npoints, diag, subdiag, x, V);
1574:     for (i = 0; i < npoints; i++) w[i] = PetscSqr(PetscRealPart(V[i * npoints])) * mu0;
1575:     PetscFree(V);
1576:     PetscFree2(diag, subdiag);
1577:   }
1578: #else
1579:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "A LAPACK symmetric tridiagonal eigensolver could not be found");
1580: #endif
1581:   { /* As of March 2, 2020, The Sun Performance Library breaks the LAPACK contract for xstegr and xsteqr: the
1582:        eigenvalues are not guaranteed to be in ascending order.  So we heave a passive aggressive sigh and check that
1583:        the eigenvalues are sorted */
1584:     PetscBool sorted;

1586:     PetscSortedReal(npoints, x, &sorted);
1587:     if (!sorted) {
1588:       PetscInt  *order, i;
1589:       PetscReal *tmp;

1591:       PetscMalloc2(npoints, &order, npoints, &tmp);
1592:       for (i = 0; i < npoints; i++) order[i] = i;
1593:       PetscSortRealWithPermutation(npoints, x, order);
1594:       PetscArraycpy(tmp, x, npoints);
1595:       for (i = 0; i < npoints; i++) x[i] = tmp[order[i]];
1596:       PetscArraycpy(tmp, w, npoints);
1597:       for (i = 0; i < npoints; i++) w[i] = tmp[order[i]];
1598:       PetscFree2(order, tmp);
1599:     }
1600:   }
1601:   return 0;
1602: }

1604: static PetscErrorCode PetscDTGaussJacobiQuadrature_Internal(PetscInt npoints, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[], PetscBool newton)
1605: {
1607:   /* If asking for a 1D Lobatto point, just return the non-Lobatto 1D point */

1611:   if (newton) PetscDTGaussJacobiQuadrature_Newton_Internal(npoints, alpha, beta, x, w);
1612:   else PetscDTGaussJacobiQuadrature_GolubWelsch_Internal(npoints, alpha, beta, x, w);
1613:   if (alpha == beta) { /* symmetrize */
1614:     PetscInt i;
1615:     for (i = 0; i < (npoints + 1) / 2; i++) {
1616:       PetscInt  j  = npoints - 1 - i;
1617:       PetscReal xi = x[i];
1618:       PetscReal xj = x[j];
1619:       PetscReal wi = w[i];
1620:       PetscReal wj = w[j];

1622:       x[i] = (xi - xj) / 2.;
1623:       x[j] = (xj - xi) / 2.;
1624:       w[i] = w[j] = (wi + wj) / 2.;
1625:     }
1626:   }
1627:   return 0;
1628: }

1630: /*@
1631:   PetscDTGaussJacobiQuadrature - quadrature for the interval [a, b] with the weight function
1632:   $(x-a)^\alpha (x-b)^\beta$.

1634:   Not collective

1636:   Input Parameters:
1637: + npoints - the number of points in the quadrature rule
1638: . a - the left endpoint of the interval
1639: . b - the right endpoint of the interval
1640: . alpha - the left exponent
1641: - beta - the right exponent

1643:   Output Parameters:
1644: + x - array of length npoints, the locations of the quadrature points
1645: - w - array of length npoints, the weights of the quadrature points

1647:   Level: intermediate

1649:   Note:
1650:   This quadrature rule is exact for polynomials up to degree 2*npoints - 1.

1652: .seealso: `PetscDTGaussQuadrature()`
1653: @*/
1654: PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[])
1655: {
1656:   PetscInt i;

1658:   PetscDTGaussJacobiQuadrature_Internal(npoints, alpha, beta, x, w, PetscDTGaussQuadratureNewton_Internal);
1659:   if (a != -1. || b != 1.) { /* shift */
1660:     for (i = 0; i < npoints; i++) {
1661:       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1662:       w[i] *= (b - a) / 2.;
1663:     }
1664:   }
1665:   return 0;
1666: }

1668: static PetscErrorCode PetscDTGaussLobattoJacobiQuadrature_Internal(PetscInt npoints, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[], PetscBool newton)
1669: {
1670:   PetscInt i;

1673:   /* If asking for a 1D Lobatto point, just return the non-Lobatto 1D point */

1677:   x[0]           = -1.;
1678:   x[npoints - 1] = 1.;
1679:   if (npoints > 2) PetscDTGaussJacobiQuadrature_Internal(npoints - 2, alpha + 1., beta + 1., &x[1], &w[1], newton);
1680:   for (i = 1; i < npoints - 1; i++) w[i] /= (1. - x[i] * x[i]);
1681:   PetscDTGaussLobattoJacobiEndweights_Internal(npoints, alpha, beta, &w[0], &w[npoints - 1]);
1682:   return 0;
1683: }

1685: /*@
1686:   PetscDTGaussLobattoJacobiQuadrature - quadrature for the interval [a, b] with the weight function
1687:   $(x-a)^\alpha (x-b)^\beta$, with endpoints a and b included as quadrature points.

1689:   Not collective

1691:   Input Parameters:
1692: + npoints - the number of points in the quadrature rule
1693: . a - the left endpoint of the interval
1694: . b - the right endpoint of the interval
1695: . alpha - the left exponent
1696: - beta - the right exponent

1698:   Output Parameters:
1699: + x - array of length npoints, the locations of the quadrature points
1700: - w - array of length npoints, the weights of the quadrature points

1702:   Level: intermediate

1704:   Note:
1705:   This quadrature rule is exact for polynomials up to degree 2*npoints - 3.

1707: .seealso: `PetscDTGaussJacobiQuadrature()`
1708: @*/
1709: PetscErrorCode PetscDTGaussLobattoJacobiQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal alpha, PetscReal beta, PetscReal x[], PetscReal w[])
1710: {
1711:   PetscInt i;

1713:   PetscDTGaussLobattoJacobiQuadrature_Internal(npoints, alpha, beta, x, w, PetscDTGaussQuadratureNewton_Internal);
1714:   if (a != -1. || b != 1.) { /* shift */
1715:     for (i = 0; i < npoints; i++) {
1716:       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1717:       w[i] *= (b - a) / 2.;
1718:     }
1719:   }
1720:   return 0;
1721: }

1723: /*@
1724:    PetscDTGaussQuadrature - create Gauss-Legendre quadrature

1726:    Not Collective

1728:    Input Parameters:
1729: +  npoints - number of points
1730: .  a - left end of interval (often-1)
1731: -  b - right end of interval (often +1)

1733:    Output Parameters:
1734: +  x - quadrature points
1735: -  w - quadrature weights

1737:    Level: intermediate

1739:    References:
1740: .  * - Golub and Welsch, Calculation of Quadrature Rules, Math. Comp. 23(106), 1969.

1742: .seealso: `PetscDTLegendreEval()`, `PetscDTGaussJacobiQuadrature()`
1743: @*/
1744: PetscErrorCode PetscDTGaussQuadrature(PetscInt npoints, PetscReal a, PetscReal b, PetscReal *x, PetscReal *w)
1745: {
1746:   PetscInt i;

1748:   PetscDTGaussJacobiQuadrature_Internal(npoints, 0., 0., x, w, PetscDTGaussQuadratureNewton_Internal);
1749:   if (a != -1. || b != 1.) { /* shift */
1750:     for (i = 0; i < npoints; i++) {
1751:       x[i] = (x[i] + 1.) * ((b - a) / 2.) + a;
1752:       w[i] *= (b - a) / 2.;
1753:     }
1754:   }
1755:   return 0;
1756: }

1758: /*@C
1759:    PetscDTGaussLobattoLegendreQuadrature - creates a set of the locations and weights of the Gauss-Lobatto-Legendre
1760:                       nodes of a given size on the domain [-1,1]

1762:    Not Collective

1764:    Input Parameters:
1765: +  n - number of grid nodes
1766: -  type - `PETSCGAUSSLOBATTOLEGENDRE_VIA_LINEAR_ALGEBRA` or `PETSCGAUSSLOBATTOLEGENDRE_VIA_NEWTON`

1768:    Output Parameters:
1769: +  x - quadrature points
1770: -  w - quadrature weights

1772:    Level: intermediate

1774:    Notes:
1775:     For n > 30  the Newton approach computes duplicate (incorrect) values for some nodes because the initial guess is apparently not
1776:           close enough to the desired solution

1778:    These are useful for implementing spectral methods based on Gauss-Lobatto-Legendre (GLL) nodes

1780:    See  https://epubs.siam.org/doi/abs/10.1137/110855442  https://epubs.siam.org/doi/abs/10.1137/120889873 for better ways to compute GLL nodes

1782: .seealso: `PetscDTGaussQuadrature()`, `PetscGaussLobattoLegendreCreateType`

1784: @*/
1785: PetscErrorCode PetscDTGaussLobattoLegendreQuadrature(PetscInt npoints, PetscGaussLobattoLegendreCreateType type, PetscReal *x, PetscReal *w)
1786: {
1787:   PetscBool newton;

1790:   newton = (PetscBool)(type == PETSCGAUSSLOBATTOLEGENDRE_VIA_NEWTON);
1791:   PetscDTGaussLobattoJacobiQuadrature_Internal(npoints, 0., 0., x, w, newton);
1792:   return 0;
1793: }

1795: /*@
1796:   PetscDTGaussTensorQuadrature - creates a tensor-product Gauss quadrature

1798:   Not Collective

1800:   Input Parameters:
1801: + dim     - The spatial dimension
1802: . Nc      - The number of components
1803: . npoints - number of points in one dimension
1804: . a       - left end of interval (often-1)
1805: - b       - right end of interval (often +1)

1807:   Output Parameter:
1808: . q - A `PetscQuadrature` object

1810:   Level: intermediate

1812: .seealso: `PetscDTGaussQuadrature()`, `PetscDTLegendreEval()`
1813: @*/
1814: PetscErrorCode PetscDTGaussTensorQuadrature(PetscInt dim, PetscInt Nc, PetscInt npoints, PetscReal a, PetscReal b, PetscQuadrature *q)
1815: {
1816:   PetscInt   totpoints = dim > 1 ? dim > 2 ? npoints * PetscSqr(npoints) : PetscSqr(npoints) : npoints, i, j, k, c;
1817:   PetscReal *x, *w, *xw, *ww;

1819:   PetscMalloc1(totpoints * dim, &x);
1820:   PetscMalloc1(totpoints * Nc, &w);
1821:   /* Set up the Golub-Welsch system */
1822:   switch (dim) {
1823:   case 0:
1824:     PetscFree(x);
1825:     PetscFree(w);
1826:     PetscMalloc1(1, &x);
1827:     PetscMalloc1(Nc, &w);
1828:     x[0] = 0.0;
1829:     for (c = 0; c < Nc; ++c) w[c] = 1.0;
1830:     break;
1831:   case 1:
1832:     PetscMalloc1(npoints, &ww);
1833:     PetscDTGaussQuadrature(npoints, a, b, x, ww);
1834:     for (i = 0; i < npoints; ++i)
1835:       for (c = 0; c < Nc; ++c) w[i * Nc + c] = ww[i];
1836:     PetscFree(ww);
1837:     break;
1838:   case 2:
1839:     PetscMalloc2(npoints, &xw, npoints, &ww);
1840:     PetscDTGaussQuadrature(npoints, a, b, xw, ww);
1841:     for (i = 0; i < npoints; ++i) {
1842:       for (j = 0; j < npoints; ++j) {
1843:         x[(i * npoints + j) * dim + 0] = xw[i];
1844:         x[(i * npoints + j) * dim + 1] = xw[j];
1845:         for (c = 0; c < Nc; ++c) w[(i * npoints + j) * Nc + c] = ww[i] * ww[j];
1846:       }
1847:     }
1848:     PetscFree2(xw, ww);
1849:     break;
1850:   case 3:
1851:     PetscMalloc2(npoints, &xw, npoints, &ww);
1852:     PetscDTGaussQuadrature(npoints, a, b, xw, ww);
1853:     for (i = 0; i < npoints; ++i) {
1854:       for (j = 0; j < npoints; ++j) {
1855:         for (k = 0; k < npoints; ++k) {
1856:           x[((i * npoints + j) * npoints + k) * dim + 0] = xw[i];
1857:           x[((i * npoints + j) * npoints + k) * dim + 1] = xw[j];
1858:           x[((i * npoints + j) * npoints + k) * dim + 2] = xw[k];
1859:           for (c = 0; c < Nc; ++c) w[((i * npoints + j) * npoints + k) * Nc + c] = ww[i] * ww[j] * ww[k];
1860:         }
1861:       }
1862:     }
1863:     PetscFree2(xw, ww);
1864:     break;
1865:   default:
1866:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %" PetscInt_FMT, dim);
1867:   }
1868:   PetscQuadratureCreate(PETSC_COMM_SELF, q);
1869:   PetscQuadratureSetOrder(*q, 2 * npoints - 1);
1870:   PetscQuadratureSetData(*q, dim, Nc, totpoints, x, w);
1871:   PetscObjectChangeTypeName((PetscObject)*q, "GaussTensor");
1872:   return 0;
1873: }

1875: /*@
1876:   PetscDTStroudConicalQuadrature - create Stroud conical quadrature for a simplex

1878:   Not Collective

1880:   Input Parameters:
1881: + dim     - The simplex dimension
1882: . Nc      - The number of components
1883: . npoints - The number of points in one dimension
1884: . a       - left end of interval (often-1)
1885: - b       - right end of interval (often +1)

1887:   Output Parameter:
1888: . q - A PetscQuadrature object

1890:   Level: intermediate

1892:   Note:
1893:   For dim == 1, this is Gauss-Legendre quadrature

1895:   References:
1896: . * - Karniadakis and Sherwin.  FIAT

1898: .seealso: `PetscDTGaussTensorQuadrature()`, `PetscDTGaussQuadrature()`
1899: @*/
1900: PetscErrorCode PetscDTStroudConicalQuadrature(PetscInt dim, PetscInt Nc, PetscInt npoints, PetscReal a, PetscReal b, PetscQuadrature *q)
1901: {
1902:   PetscInt   totprev, totrem;
1903:   PetscInt   totpoints;
1904:   PetscReal *p1, *w1;
1905:   PetscReal *x, *w;
1906:   PetscInt   i, j, k, l, m, pt, c;

1909:   totpoints = 1;
1910:   for (i = 0, totpoints = 1; i < dim; i++) totpoints *= npoints;
1911:   PetscMalloc1(totpoints * dim, &x);
1912:   PetscMalloc1(totpoints * Nc, &w);
1913:   PetscMalloc2(npoints, &p1, npoints, &w1);
1914:   for (i = 0; i < totpoints * Nc; i++) w[i] = 1.;
1915:   for (i = 0, totprev = 1, totrem = totpoints / npoints; i < dim; i++) {
1916:     PetscReal mul;

1918:     mul = PetscPowReal(2., -i);
1919:     PetscDTGaussJacobiQuadrature(npoints, -1., 1., i, 0.0, p1, w1);
1920:     for (pt = 0, l = 0; l < totprev; l++) {
1921:       for (j = 0; j < npoints; j++) {
1922:         for (m = 0; m < totrem; m++, pt++) {
1923:           for (k = 0; k < i; k++) x[pt * dim + k] = (x[pt * dim + k] + 1.) * (1. - p1[j]) * 0.5 - 1.;
1924:           x[pt * dim + i] = p1[j];
1925:           for (c = 0; c < Nc; c++) w[pt * Nc + c] *= mul * w1[j];
1926:         }
1927:       }
1928:     }
1929:     totprev *= npoints;
1930:     totrem /= npoints;
1931:   }
1932:   PetscFree2(p1, w1);
1933:   PetscQuadratureCreate(PETSC_COMM_SELF, q);
1934:   PetscQuadratureSetOrder(*q, 2 * npoints - 1);
1935:   PetscQuadratureSetData(*q, dim, Nc, totpoints, x, w);
1936:   PetscObjectChangeTypeName((PetscObject)*q, "StroudConical");
1937:   return 0;
1938: }

1940: static PetscBool MinSymTriQuadCite       = PETSC_FALSE;
1941: const char       MinSymTriQuadCitation[] = "@article{WitherdenVincent2015,\n"
1942:                                            "  title = {On the identification of symmetric quadrature rules for finite element methods},\n"
1943:                                            "  journal = {Computers & Mathematics with Applications},\n"
1944:                                            "  volume = {69},\n"
1945:                                            "  number = {10},\n"
1946:                                            "  pages = {1232-1241},\n"
1947:                                            "  year = {2015},\n"
1948:                                            "  issn = {0898-1221},\n"
1949:                                            "  doi = {10.1016/j.camwa.2015.03.017},\n"
1950:                                            "  url = {https://www.sciencedirect.com/science/article/pii/S0898122115001224},\n"
1951:                                            "  author = {F.D. Witherden and P.E. Vincent},\n"
1952:                                            "}\n";

1954: #include "petscdttriquadrules.h"

1956: static PetscBool MinSymTetQuadCite       = PETSC_FALSE;
1957: const char       MinSymTetQuadCitation[] = "@article{JaskowiecSukumar2021\n"
1958:                                            "  author = {Jaskowiec, Jan and Sukumar, N.},\n"
1959:                                            "  title = {High-order symmetric cubature rules for tetrahedra and pyramids},\n"
1960:                                            "  journal = {International Journal for Numerical Methods in Engineering},\n"
1961:                                            "  volume = {122},\n"
1962:                                            "  number = {1},\n"
1963:                                            "  pages = {148-171},\n"
1964:                                            "  doi = {10.1002/nme.6528},\n"
1965:                                            "  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/nme.6528},\n"
1966:                                            "  eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/nme.6528},\n"
1967:                                            "  year = {2021}\n"
1968:                                            "}\n";

1970: #include "petscdttetquadrules.h"

1972: // https://en.wikipedia.org/wiki/Partition_(number_theory)
1973: static PetscErrorCode PetscDTPartitionNumber(PetscInt n, PetscInt *p)
1974: {
1975:   // sequence A000041 in the OEIS
1976:   const PetscInt partition[]   = {1, 1, 2, 3, 5, 7, 11, 15, 22, 30, 42, 56, 77, 101, 135, 176, 231, 297, 385, 490, 627, 792, 1002, 1255, 1575, 1958, 2436, 3010, 3718, 4565, 5604};
1977:   PetscInt       tabulated_max = PETSC_STATIC_ARRAY_LENGTH(partition) - 1;

1980:   // not implementing the pentagonal number recurrence, we don't need partition numbers for n that high
1982:   *p = partition[n];
1983:   return 0;
1984: }

1986: /*@
1987:   PetscDTSimplexQuadrature - Create a quadrature rule for a simplex that exactly integrates polynomials up to a given degree.

1989:   Not Collective

1991:   Input Parameters:
1992: + dim     - The spatial dimension of the simplex (1 = segment, 2 = triangle, 3 = tetrahedron)
1993: . degree  - The largest polynomial degree that is required to be integrated exactly
1994: - type    - left end of interval (often-1)

1996:   Output Parameter:
1997: . quad    - A `PetscQuadrature` object for integration over the biunit simplex
1998:             (defined by the bounds $x_i >= -1$ and $\sum_i x_i <= 2 - d$) that is exact for
1999:             polynomials up to the given degree

2001:   Level: intermediate

2003: .seealso: `PetscDTSimplexQuadratureType`, `PetscDTGaussQuadrature()`, `PetscDTStroudCononicalQuadrature()`, `PetscQuadrature`
2004: @*/
2005: PetscErrorCode PetscDTSimplexQuadrature(PetscInt dim, PetscInt degree, PetscDTSimplexQuadratureType type, PetscQuadrature *quad)
2006: {
2007:   PetscDTSimplexQuadratureType orig_type = type;

2011:   if (type == PETSCDTSIMPLEXQUAD_DEFAULT) type = PETSCDTSIMPLEXQUAD_MINSYM;
2012:   if (type == PETSCDTSIMPLEXQUAD_CONIC || dim < 2) {
2013:     PetscInt points_per_dim = (degree + 2) / 2; // ceil((degree + 1) / 2);
2014:     PetscDTStroudConicalQuadrature(dim, 1, points_per_dim, -1, 1, quad);
2015:   } else {
2016:     PetscInt          n    = dim + 1;
2017:     PetscInt          fact = 1;
2018:     PetscInt         *part, *perm;
2019:     PetscInt          p = 0;
2020:     PetscInt          max_degree;
2021:     const PetscInt   *nodes_per_type     = NULL;
2022:     const PetscInt   *all_num_full_nodes = NULL;
2023:     const PetscReal **weights_list       = NULL;
2024:     const PetscReal **compact_nodes_list = NULL;
2025:     const char       *citation           = NULL;
2026:     PetscBool        *cited              = NULL;

2028:     switch (dim) {
2029:     case 2:
2030:       cited              = &MinSymTriQuadCite;
2031:       citation           = MinSymTriQuadCitation;
2032:       max_degree         = PetscDTWVTriQuad_max_degree;
2033:       nodes_per_type     = PetscDTWVTriQuad_num_orbits;
2034:       all_num_full_nodes = PetscDTWVTriQuad_num_nodes;
2035:       weights_list       = PetscDTWVTriQuad_weights;
2036:       compact_nodes_list = PetscDTWVTriQuad_orbits;
2037:       break;
2038:     case 3:
2039:       cited              = &MinSymTetQuadCite;
2040:       citation           = MinSymTetQuadCitation;
2041:       max_degree         = PetscDTJSTetQuad_max_degree;
2042:       nodes_per_type     = PetscDTJSTetQuad_num_orbits;
2043:       all_num_full_nodes = PetscDTJSTetQuad_num_nodes;
2044:       weights_list       = PetscDTJSTetQuad_weights;
2045:       compact_nodes_list = PetscDTJSTetQuad_orbits;
2046:       break;
2047:     default:
2048:       max_degree = -1;
2049:       break;
2050:     }

2052:     if (degree > max_degree) {
2053:       if (orig_type == PETSCDTSIMPLEXQUAD_DEFAULT) {
2054:         // fall back to conic
2055:         PetscDTSimplexQuadrature(dim, degree, PETSCDTSIMPLEXQUAD_CONIC, quad);
2056:         return 0;
2057:       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Minimal symmetric quadrature for dim %" PetscInt_FMT ", degree %" PetscInt_FMT " unsupported", dim, degree);
2058:     }

2060:     PetscCitationsRegister(citation, cited);

2062:     PetscDTPartitionNumber(n, &p);
2063:     for (PetscInt d = 2; d <= n; d++) fact *= d;

2065:     PetscInt         num_full_nodes      = all_num_full_nodes[degree];
2066:     const PetscReal *all_compact_nodes   = compact_nodes_list[degree];
2067:     const PetscReal *all_compact_weights = weights_list[degree];
2068:     nodes_per_type                       = &nodes_per_type[p * degree];

2070:     PetscReal      *points;
2071:     PetscReal      *counts;
2072:     PetscReal      *weights;
2073:     PetscReal      *bary_to_biunit; // row-major transformation of barycentric coordinate to biunit
2074:     PetscQuadrature q;

2076:     // compute the transformation
2077:     PetscMalloc1(n * dim, &bary_to_biunit);
2078:     for (PetscInt d = 0; d < dim; d++) {
2079:       for (PetscInt b = 0; b < n; b++) bary_to_biunit[d * n + b] = (d == b) ? 1.0 : -1.0;
2080:     }

2082:     PetscMalloc3(n, &part, n, &perm, n, &counts);
2083:     PetscCalloc1(num_full_nodes * dim, &points);
2084:     PetscMalloc1(num_full_nodes, &weights);

2086:     // (0, 0, ...) is the first partition lexicographically
2087:     PetscArrayzero(part, n);
2088:     PetscArrayzero(counts, n);
2089:     counts[0] = n;

2091:     // for each partition
2092:     for (PetscInt s = 0, node_offset = 0; s < p; s++) {
2093:       PetscInt num_compact_coords = part[n - 1] + 1;

2095:       const PetscReal *compact_nodes   = all_compact_nodes;
2096:       const PetscReal *compact_weights = all_compact_weights;
2097:       all_compact_nodes += num_compact_coords * nodes_per_type[s];
2098:       all_compact_weights += nodes_per_type[s];

2100:       // for every permutation of the vertices
2101:       for (PetscInt f = 0; f < fact; f++) {
2102:         PetscDTEnumPerm(n, f, perm, NULL);

2104:         // check if it is a valid permutation
2105:         PetscInt digit;
2106:         for (digit = 1; digit < n; digit++) {
2107:           // skip permutations that would duplicate a node because it has a smaller symmetry group
2108:           if (part[digit - 1] == part[digit] && perm[digit - 1] > perm[digit]) break;
2109:         }
2110:         if (digit < n) continue;

2112:         // create full nodes from this permutation of the compact nodes
2113:         PetscReal *full_nodes   = &points[node_offset * dim];
2114:         PetscReal *full_weights = &weights[node_offset];

2116:         PetscArraycpy(full_weights, compact_weights, nodes_per_type[s]);
2117:         for (PetscInt b = 0; b < n; b++) {
2118:           for (PetscInt d = 0; d < dim; d++) {
2119:             for (PetscInt node = 0; node < nodes_per_type[s]; node++) full_nodes[node * dim + d] += bary_to_biunit[d * n + perm[b]] * compact_nodes[node * num_compact_coords + part[b]];
2120:           }
2121:         }
2122:         node_offset += nodes_per_type[s];
2123:       }

2125:       if (s < p - 1) { // Generate the next partition
2126:         /* A partition is described by the number of coordinates that are in
2127:          * each set of duplicates (counts) and redundantly by mapping each
2128:          * index to its set of duplicates (part)
2129:          *
2130:          * Counts should always be in nonincreasing order
2131:          *
2132:          * We want to generate the partitions lexically by part, which means
2133:          * finding the last index where count > 1 and reducing by 1.
2134:          *
2135:          * For the new counts beyond that index, we eagerly assign the remaining
2136:          * capacity of the partition to smaller indices (ensures lexical ordering),
2137:          * while respecting the nonincreasing invariant of the counts
2138:          */
2139:         PetscInt last_digit            = part[n - 1];
2140:         PetscInt last_digit_with_extra = last_digit;
2141:         while (counts[last_digit_with_extra] == 1) last_digit_with_extra--;
2142:         PetscInt limit               = --counts[last_digit_with_extra];
2143:         PetscInt total_to_distribute = last_digit - last_digit_with_extra + 1;
2144:         for (PetscInt digit = last_digit_with_extra + 1; digit < n; digit++) {
2145:           counts[digit] = PetscMin(limit, total_to_distribute);
2146:           total_to_distribute -= PetscMin(limit, total_to_distribute);
2147:         }
2148:         for (PetscInt digit = 0, offset = 0; digit < n; digit++) {
2149:           PetscInt count = counts[digit];
2150:           for (PetscInt c = 0; c < count; c++) part[offset++] = digit;
2151:         }
2152:       }
2153:     }
2154:     PetscFree3(part, perm, counts);
2155:     PetscFree(bary_to_biunit);
2156:     PetscQuadratureCreate(PETSC_COMM_SELF, &q);
2157:     PetscQuadratureSetData(q, dim, 1, num_full_nodes, points, weights);
2158:     *quad = q;
2159:   }
2160:   return 0;
2161: }

2163: /*@
2164:   PetscDTTanhSinhTensorQuadrature - create tanh-sinh quadrature for a tensor product cell

2166:   Not Collective

2168:   Input Parameters:
2169: + dim   - The cell dimension
2170: . level - The number of points in one dimension, 2^l
2171: . a     - left end of interval (often-1)
2172: - b     - right end of interval (often +1)

2174:   Output Parameter:
2175: . q - A `PetscQuadrature` object

2177:   Level: intermediate

2179: .seealso: `PetscDTGaussTensorQuadrature()`, `PetscQuadrature`
2180: @*/
2181: PetscErrorCode PetscDTTanhSinhTensorQuadrature(PetscInt dim, PetscInt level, PetscReal a, PetscReal b, PetscQuadrature *q)
2182: {
2183:   const PetscInt  p     = 16;                        /* Digits of precision in the evaluation */
2184:   const PetscReal alpha = (b - a) / 2.;              /* Half-width of the integration interval */
2185:   const PetscReal beta  = (b + a) / 2.;              /* Center of the integration interval */
2186:   const PetscReal h     = PetscPowReal(2.0, -level); /* Step size, length between x_k */
2187:   PetscReal       xk;                                /* Quadrature point x_k on reference domain [-1, 1] */
2188:   PetscReal       wk = 0.5 * PETSC_PI;               /* Quadrature weight at x_k */
2189:   PetscReal      *x, *w;
2190:   PetscInt        K, k, npoints;

2194:   /* Find K such that the weights are < 32 digits of precision */
2195:   for (K = 1; PetscAbsReal(PetscLog10Real(wk)) < 2 * p; ++K) wk = 0.5 * h * PETSC_PI * PetscCoshReal(K * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(K * h)));
2196:   PetscQuadratureCreate(PETSC_COMM_SELF, q);
2197:   PetscQuadratureSetOrder(*q, 2 * K + 1);
2198:   npoints = 2 * K - 1;
2199:   PetscMalloc1(npoints * dim, &x);
2200:   PetscMalloc1(npoints, &w);
2201:   /* Center term */
2202:   x[0] = beta;
2203:   w[0] = 0.5 * alpha * PETSC_PI;
2204:   for (k = 1; k < K; ++k) {
2205:     wk           = 0.5 * alpha * h * PETSC_PI * PetscCoshReal(k * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2206:     xk           = PetscTanhReal(0.5 * PETSC_PI * PetscSinhReal(k * h));
2207:     x[2 * k - 1] = -alpha * xk + beta;
2208:     w[2 * k - 1] = wk;
2209:     x[2 * k + 0] = alpha * xk + beta;
2210:     w[2 * k + 0] = wk;
2211:   }
2212:   PetscQuadratureSetData(*q, dim, 1, npoints, x, w);
2213:   return 0;
2214: }

2216: PetscErrorCode PetscDTTanhSinhIntegrate(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2217: {
2218:   const PetscInt  p     = 16;           /* Digits of precision in the evaluation */
2219:   const PetscReal alpha = (b - a) / 2.; /* Half-width of the integration interval */
2220:   const PetscReal beta  = (b + a) / 2.; /* Center of the integration interval */
2221:   PetscReal       h     = 1.0;          /* Step size, length between x_k */
2222:   PetscInt        l     = 0;            /* Level of refinement, h = 2^{-l} */
2223:   PetscReal       osum  = 0.0;          /* Integral on last level */
2224:   PetscReal       psum  = 0.0;          /* Integral on the level before the last level */
2225:   PetscReal       sum;                  /* Integral on current level */
2226:   PetscReal       yk;                   /* Quadrature point 1 - x_k on reference domain [-1, 1] */
2227:   PetscReal       lx, rx;               /* Quadrature points to the left and right of 0 on the real domain [a, b] */
2228:   PetscReal       wk;                   /* Quadrature weight at x_k */
2229:   PetscReal       lval, rval;           /* Terms in the quadature sum to the left and right of 0 */
2230:   PetscInt        d;                    /* Digits of precision in the integral */

2233:   /* Center term */
2234:   func(&beta, ctx, &lval);
2235:   sum = 0.5 * alpha * PETSC_PI * lval;
2236:   /* */
2237:   do {
2238:     PetscReal lterm, rterm, maxTerm = 0.0, d1, d2, d3, d4;
2239:     PetscInt  k = 1;

2241:     ++l;
2242:     /* PetscPrintf(PETSC_COMM_SELF, "LEVEL %" PetscInt_FMT " sum: %15.15f\n", l, sum); */
2243:     /* At each level of refinement, h --> h/2 and sum --> sum/2 */
2244:     psum = osum;
2245:     osum = sum;
2246:     h *= 0.5;
2247:     sum *= 0.5;
2248:     do {
2249:       wk = 0.5 * h * PETSC_PI * PetscCoshReal(k * h) / PetscSqr(PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2250:       yk = 1.0 / (PetscExpReal(0.5 * PETSC_PI * PetscSinhReal(k * h)) * PetscCoshReal(0.5 * PETSC_PI * PetscSinhReal(k * h)));
2251:       lx = -alpha * (1.0 - yk) + beta;
2252:       rx = alpha * (1.0 - yk) + beta;
2253:       func(&lx, ctx, &lval);
2254:       func(&rx, ctx, &rval);
2255:       lterm   = alpha * wk * lval;
2256:       maxTerm = PetscMax(PetscAbsReal(lterm), maxTerm);
2257:       sum += lterm;
2258:       rterm   = alpha * wk * rval;
2259:       maxTerm = PetscMax(PetscAbsReal(rterm), maxTerm);
2260:       sum += rterm;
2261:       ++k;
2262:       /* Only need to evaluate every other point on refined levels */
2263:       if (l != 1) ++k;
2264:     } while (PetscAbsReal(PetscLog10Real(wk)) < p); /* Only need to evaluate sum until weights are < 32 digits of precision */

2266:     d1 = PetscLog10Real(PetscAbsReal(sum - osum));
2267:     d2 = PetscLog10Real(PetscAbsReal(sum - psum));
2268:     d3 = PetscLog10Real(maxTerm) - p;
2269:     if (PetscMax(PetscAbsReal(lterm), PetscAbsReal(rterm)) == 0.0) d4 = 0.0;
2270:     else d4 = PetscLog10Real(PetscMax(PetscAbsReal(lterm), PetscAbsReal(rterm)));
2271:     d = PetscAbsInt(PetscMin(0, PetscMax(PetscMax(PetscMax(PetscSqr(d1) / d2, 2 * d1), d3), d4)));
2272:   } while (d < digits && l < 12);
2273:   *sol = sum;

2275:   return 0;
2276: }

2278: #if defined(PETSC_HAVE_MPFR)
2279: PetscErrorCode PetscDTTanhSinhIntegrateMPFR(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2280: {
2281:   const PetscInt safetyFactor = 2; /* Calculate abcissa until 2*p digits */
2282:   PetscInt       l            = 0; /* Level of refinement, h = 2^{-l} */
2283:   mpfr_t         alpha;            /* Half-width of the integration interval */
2284:   mpfr_t         beta;             /* Center of the integration interval */
2285:   mpfr_t         h;                /* Step size, length between x_k */
2286:   mpfr_t         osum;             /* Integral on last level */
2287:   mpfr_t         psum;             /* Integral on the level before the last level */
2288:   mpfr_t         sum;              /* Integral on current level */
2289:   mpfr_t         yk;               /* Quadrature point 1 - x_k on reference domain [-1, 1] */
2290:   mpfr_t         lx, rx;           /* Quadrature points to the left and right of 0 on the real domain [a, b] */
2291:   mpfr_t         wk;               /* Quadrature weight at x_k */
2292:   PetscReal      lval, rval, rtmp; /* Terms in the quadature sum to the left and right of 0 */
2293:   PetscInt       d;                /* Digits of precision in the integral */
2294:   mpfr_t         pi2, kh, msinh, mcosh, maxTerm, curTerm, tmp;

2297:   /* Create high precision storage */
2298:   mpfr_inits2(PetscCeilReal(safetyFactor * digits * PetscLogReal(10.) / PetscLogReal(2.)), alpha, beta, h, sum, osum, psum, yk, wk, lx, rx, tmp, maxTerm, curTerm, pi2, kh, msinh, mcosh, NULL);
2299:   /* Initialization */
2300:   mpfr_set_d(alpha, 0.5 * (b - a), MPFR_RNDN);
2301:   mpfr_set_d(beta, 0.5 * (b + a), MPFR_RNDN);
2302:   mpfr_set_d(osum, 0.0, MPFR_RNDN);
2303:   mpfr_set_d(psum, 0.0, MPFR_RNDN);
2304:   mpfr_set_d(h, 1.0, MPFR_RNDN);
2305:   mpfr_const_pi(pi2, MPFR_RNDN);
2306:   mpfr_mul_d(pi2, pi2, 0.5, MPFR_RNDN);
2307:   /* Center term */
2308:   rtmp = 0.5 * (b + a);
2309:   func(&rtmp, ctx, &lval);
2310:   mpfr_set(sum, pi2, MPFR_RNDN);
2311:   mpfr_mul(sum, sum, alpha, MPFR_RNDN);
2312:   mpfr_mul_d(sum, sum, lval, MPFR_RNDN);
2313:   /* */
2314:   do {
2315:     PetscReal d1, d2, d3, d4;
2316:     PetscInt  k = 1;

2318:     ++l;
2319:     mpfr_set_d(maxTerm, 0.0, MPFR_RNDN);
2320:     /* PetscPrintf(PETSC_COMM_SELF, "LEVEL %" PetscInt_FMT " sum: %15.15f\n", l, sum); */
2321:     /* At each level of refinement, h --> h/2 and sum --> sum/2 */
2322:     mpfr_set(psum, osum, MPFR_RNDN);
2323:     mpfr_set(osum, sum, MPFR_RNDN);
2324:     mpfr_mul_d(h, h, 0.5, MPFR_RNDN);
2325:     mpfr_mul_d(sum, sum, 0.5, MPFR_RNDN);
2326:     do {
2327:       mpfr_set_si(kh, k, MPFR_RNDN);
2328:       mpfr_mul(kh, kh, h, MPFR_RNDN);
2329:       /* Weight */
2330:       mpfr_set(wk, h, MPFR_RNDN);
2331:       mpfr_sinh_cosh(msinh, mcosh, kh, MPFR_RNDN);
2332:       mpfr_mul(msinh, msinh, pi2, MPFR_RNDN);
2333:       mpfr_mul(mcosh, mcosh, pi2, MPFR_RNDN);
2334:       mpfr_cosh(tmp, msinh, MPFR_RNDN);
2335:       mpfr_sqr(tmp, tmp, MPFR_RNDN);
2336:       mpfr_mul(wk, wk, mcosh, MPFR_RNDN);
2337:       mpfr_div(wk, wk, tmp, MPFR_RNDN);
2338:       /* Abscissa */
2339:       mpfr_set_d(yk, 1.0, MPFR_RNDZ);
2340:       mpfr_cosh(tmp, msinh, MPFR_RNDN);
2341:       mpfr_div(yk, yk, tmp, MPFR_RNDZ);
2342:       mpfr_exp(tmp, msinh, MPFR_RNDN);
2343:       mpfr_div(yk, yk, tmp, MPFR_RNDZ);
2344:       /* Quadrature points */
2345:       mpfr_sub_d(lx, yk, 1.0, MPFR_RNDZ);
2346:       mpfr_mul(lx, lx, alpha, MPFR_RNDU);
2347:       mpfr_add(lx, lx, beta, MPFR_RNDU);
2348:       mpfr_d_sub(rx, 1.0, yk, MPFR_RNDZ);
2349:       mpfr_mul(rx, rx, alpha, MPFR_RNDD);
2350:       mpfr_add(rx, rx, beta, MPFR_RNDD);
2351:       /* Evaluation */
2352:       rtmp = mpfr_get_d(lx, MPFR_RNDU);
2353:       func(&rtmp, ctx, &lval);
2354:       rtmp = mpfr_get_d(rx, MPFR_RNDD);
2355:       func(&rtmp, ctx, &rval);
2356:       /* Update */
2357:       mpfr_mul(tmp, wk, alpha, MPFR_RNDN);
2358:       mpfr_mul_d(tmp, tmp, lval, MPFR_RNDN);
2359:       mpfr_add(sum, sum, tmp, MPFR_RNDN);
2360:       mpfr_abs(tmp, tmp, MPFR_RNDN);
2361:       mpfr_max(maxTerm, maxTerm, tmp, MPFR_RNDN);
2362:       mpfr_set(curTerm, tmp, MPFR_RNDN);
2363:       mpfr_mul(tmp, wk, alpha, MPFR_RNDN);
2364:       mpfr_mul_d(tmp, tmp, rval, MPFR_RNDN);
2365:       mpfr_add(sum, sum, tmp, MPFR_RNDN);
2366:       mpfr_abs(tmp, tmp, MPFR_RNDN);
2367:       mpfr_max(maxTerm, maxTerm, tmp, MPFR_RNDN);
2368:       mpfr_max(curTerm, curTerm, tmp, MPFR_RNDN);
2369:       ++k;
2370:       /* Only need to evaluate every other point on refined levels */
2371:       if (l != 1) ++k;
2372:       mpfr_log10(tmp, wk, MPFR_RNDN);
2373:       mpfr_abs(tmp, tmp, MPFR_RNDN);
2374:     } while (mpfr_get_d(tmp, MPFR_RNDN) < safetyFactor * digits); /* Only need to evaluate sum until weights are < 32 digits of precision */
2375:     mpfr_sub(tmp, sum, osum, MPFR_RNDN);
2376:     mpfr_abs(tmp, tmp, MPFR_RNDN);
2377:     mpfr_log10(tmp, tmp, MPFR_RNDN);
2378:     d1 = mpfr_get_d(tmp, MPFR_RNDN);
2379:     mpfr_sub(tmp, sum, psum, MPFR_RNDN);
2380:     mpfr_abs(tmp, tmp, MPFR_RNDN);
2381:     mpfr_log10(tmp, tmp, MPFR_RNDN);
2382:     d2 = mpfr_get_d(tmp, MPFR_RNDN);
2383:     mpfr_log10(tmp, maxTerm, MPFR_RNDN);
2384:     d3 = mpfr_get_d(tmp, MPFR_RNDN) - digits;
2385:     mpfr_log10(tmp, curTerm, MPFR_RNDN);
2386:     d4 = mpfr_get_d(tmp, MPFR_RNDN);
2387:     d  = PetscAbsInt(PetscMin(0, PetscMax(PetscMax(PetscMax(PetscSqr(d1) / d2, 2 * d1), d3), d4)));
2388:   } while (d < digits && l < 8);
2389:   *sol = mpfr_get_d(sum, MPFR_RNDN);
2390:   /* Cleanup */
2391:   mpfr_clears(alpha, beta, h, sum, osum, psum, yk, wk, lx, rx, tmp, maxTerm, curTerm, pi2, kh, msinh, mcosh, NULL);
2392:   return 0;
2393: }
2394: #else

2396: PetscErrorCode PetscDTTanhSinhIntegrateMPFR(void (*func)(const PetscReal[], void *, PetscReal *), PetscReal a, PetscReal b, PetscInt digits, void *ctx, PetscReal *sol)
2397: {
2398:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "This method will not work without MPFR. Reconfigure using --download-mpfr --download-gmp");
2399: }
2400: #endif

2402: /*@
2403:   PetscDTTensorQuadratureCreate - create the tensor product quadrature from two lower-dimensional quadratures

2405:   Not Collective

2407:   Input Parameters:
2408: + q1 - The first quadrature
2409: - q2 - The second quadrature

2411:   Output Parameter:
2412: . q - A `PetscQuadrature` object

2414:   Level: intermediate

2416: .seealso: `PetscQuadrature`, `PetscDTGaussTensorQuadrature()`
2417: @*/
2418: PetscErrorCode PetscDTTensorQuadratureCreate(PetscQuadrature q1, PetscQuadrature q2, PetscQuadrature *q)
2419: {
2420:   const PetscReal *x1, *w1, *x2, *w2;
2421:   PetscReal       *x, *w;
2422:   PetscInt         dim1, Nc1, Np1, order1, qa, d1;
2423:   PetscInt         dim2, Nc2, Np2, order2, qb, d2;
2424:   PetscInt         dim, Nc, Np, order, qc, d;

2429:   PetscQuadratureGetOrder(q1, &order1);
2430:   PetscQuadratureGetOrder(q2, &order2);
2432:   PetscQuadratureGetData(q1, &dim1, &Nc1, &Np1, &x1, &w1);
2433:   PetscQuadratureGetData(q2, &dim2, &Nc2, &Np2, &x2, &w2);

2436:   dim   = dim1 + dim2;
2437:   Nc    = Nc1;
2438:   Np    = Np1 * Np2;
2439:   order = order1;
2440:   PetscQuadratureCreate(PETSC_COMM_SELF, q);
2441:   PetscQuadratureSetOrder(*q, order);
2442:   PetscMalloc1(Np * dim, &x);
2443:   PetscMalloc1(Np, &w);
2444:   for (qa = 0, qc = 0; qa < Np1; ++qa) {
2445:     for (qb = 0; qb < Np2; ++qb, ++qc) {
2446:       for (d1 = 0, d = 0; d1 < dim1; ++d1, ++d) x[qc * dim + d] = x1[qa * dim1 + d1];
2447:       for (d2 = 0; d2 < dim2; ++d2, ++d) x[qc * dim + d] = x2[qb * dim2 + d2];
2448:       w[qc] = w1[qa] * w2[qb];
2449:     }
2450:   }
2451:   PetscQuadratureSetData(*q, dim, Nc, Np, x, w);
2452:   return 0;
2453: }

2455: /* Overwrites A. Can only handle full-rank problems with m>=n
2456:    A in column-major format
2457:    Ainv in row-major format
2458:    tau has length m
2459:    worksize must be >= max(1,n)
2460:  */
2461: static PetscErrorCode PetscDTPseudoInverseQR(PetscInt m, PetscInt mstride, PetscInt n, PetscReal *A_in, PetscReal *Ainv_out, PetscScalar *tau, PetscInt worksize, PetscScalar *work)
2462: {
2463:   PetscBLASInt M, N, K, lda, ldb, ldwork, info;
2464:   PetscScalar *A, *Ainv, *R, *Q, Alpha;

2466: #if defined(PETSC_USE_COMPLEX)
2467:   {
2468:     PetscInt i, j;
2469:     PetscMalloc2(m * n, &A, m * n, &Ainv);
2470:     for (j = 0; j < n; j++) {
2471:       for (i = 0; i < m; i++) A[i + m * j] = A_in[i + mstride * j];
2472:     }
2473:     mstride = m;
2474:   }
2475: #else
2476:   A = A_in;
2477:   Ainv = Ainv_out;
2478: #endif

2480:   PetscBLASIntCast(m, &M);
2481:   PetscBLASIntCast(n, &N);
2482:   PetscBLASIntCast(mstride, &lda);
2483:   PetscBLASIntCast(worksize, &ldwork);
2484:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2485:   PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&M, &N, A, &lda, tau, work, &ldwork, &info));
2486:   PetscFPTrapPop();
2488:   R = A; /* Upper triangular part of A now contains R, the rest contains the elementary reflectors */

2490:   /* Extract an explicit representation of Q */
2491:   Q = Ainv;
2492:   PetscArraycpy(Q, A, mstride * n);
2493:   K = N; /* full rank */
2494:   PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&M, &N, &K, Q, &lda, tau, work, &ldwork, &info));

2497:   /* Compute A^{-T} = (R^{-1} Q^T)^T = Q R^{-T} */
2498:   Alpha = 1.0;
2499:   ldb   = lda;
2500:   PetscCallBLAS("BLAStrsm", BLAStrsm_("Right", "Upper", "ConjugateTranspose", "NotUnitTriangular", &M, &N, &Alpha, R, &lda, Q, &ldb));
2501:   /* Ainv is Q, overwritten with inverse */

2503: #if defined(PETSC_USE_COMPLEX)
2504:   {
2505:     PetscInt i;
2506:     for (i = 0; i < m * n; i++) Ainv_out[i] = PetscRealPart(Ainv[i]);
2507:     PetscFree2(A, Ainv);
2508:   }
2509: #endif
2510:   return 0;
2511: }

2513: /* Computes integral of L_p' over intervals {(x0,x1),(x1,x2),...} */
2514: static PetscErrorCode PetscDTLegendreIntegrate(PetscInt ninterval, const PetscReal *x, PetscInt ndegree, const PetscInt *degrees, PetscBool Transpose, PetscReal *B)
2515: {
2516:   PetscReal *Bv;
2517:   PetscInt   i, j;

2519:   PetscMalloc1((ninterval + 1) * ndegree, &Bv);
2520:   /* Point evaluation of L_p on all the source vertices */
2521:   PetscDTLegendreEval(ninterval + 1, x, ndegree, degrees, Bv, NULL, NULL);
2522:   /* Integral over each interval: \int_a^b L_p' = L_p(b)-L_p(a) */
2523:   for (i = 0; i < ninterval; i++) {
2524:     for (j = 0; j < ndegree; j++) {
2525:       if (Transpose) B[i + ninterval * j] = Bv[(i + 1) * ndegree + j] - Bv[i * ndegree + j];
2526:       else B[i * ndegree + j] = Bv[(i + 1) * ndegree + j] - Bv[i * ndegree + j];
2527:     }
2528:   }
2529:   PetscFree(Bv);
2530:   return 0;
2531: }

2533: /*@
2534:    PetscDTReconstructPoly - create matrix representing polynomial reconstruction using cell intervals and evaluation at target intervals

2536:    Not Collective

2538:    Input Parameters:
2539: +  degree - degree of reconstruction polynomial
2540: .  nsource - number of source intervals
2541: .  sourcex - sorted coordinates of source cell boundaries (length nsource+1)
2542: .  ntarget - number of target intervals
2543: -  targetx - sorted coordinates of target cell boundaries (length ntarget+1)

2545:    Output Parameter:
2546: .  R - reconstruction matrix, utarget = sum_s R[t*nsource+s] * usource[s]

2548:    Level: advanced

2550: .seealso: `PetscDTLegendreEval()`
2551: @*/
2552: PetscErrorCode PetscDTReconstructPoly(PetscInt degree, PetscInt nsource, const PetscReal *sourcex, PetscInt ntarget, const PetscReal *targetx, PetscReal *R)
2553: {
2554:   PetscInt     i, j, k, *bdegrees, worksize;
2555:   PetscReal    xmin, xmax, center, hscale, *sourcey, *targety, *Bsource, *Bsinv, *Btarget;
2556:   PetscScalar *tau, *work;

2562:   if (PetscDefined(USE_DEBUG)) {
2565:   }
2566:   xmin     = PetscMin(sourcex[0], targetx[0]);
2567:   xmax     = PetscMax(sourcex[nsource], targetx[ntarget]);
2568:   center   = (xmin + xmax) / 2;
2569:   hscale   = (xmax - xmin) / 2;
2570:   worksize = nsource;
2571:   PetscMalloc4(degree + 1, &bdegrees, nsource + 1, &sourcey, nsource * (degree + 1), &Bsource, worksize, &work);
2572:   PetscMalloc4(nsource, &tau, nsource * (degree + 1), &Bsinv, ntarget + 1, &targety, ntarget * (degree + 1), &Btarget);
2573:   for (i = 0; i <= nsource; i++) sourcey[i] = (sourcex[i] - center) / hscale;
2574:   for (i = 0; i <= degree; i++) bdegrees[i] = i + 1;
2575:   PetscDTLegendreIntegrate(nsource, sourcey, degree + 1, bdegrees, PETSC_TRUE, Bsource);
2576:   PetscDTPseudoInverseQR(nsource, nsource, degree + 1, Bsource, Bsinv, tau, nsource, work);
2577:   for (i = 0; i <= ntarget; i++) targety[i] = (targetx[i] - center) / hscale;
2578:   PetscDTLegendreIntegrate(ntarget, targety, degree + 1, bdegrees, PETSC_FALSE, Btarget);
2579:   for (i = 0; i < ntarget; i++) {
2580:     PetscReal rowsum = 0;
2581:     for (j = 0; j < nsource; j++) {
2582:       PetscReal sum = 0;
2583:       for (k = 0; k < degree + 1; k++) sum += Btarget[i * (degree + 1) + k] * Bsinv[k * nsource + j];
2584:       R[i * nsource + j] = sum;
2585:       rowsum += sum;
2586:     }
2587:     for (j = 0; j < nsource; j++) R[i * nsource + j] /= rowsum; /* normalize each row */
2588:   }
2589:   PetscFree4(bdegrees, sourcey, Bsource, work);
2590:   PetscFree4(tau, Bsinv, targety, Btarget);
2591:   return 0;
2592: }

2594: /*@C
2595:    PetscGaussLobattoLegendreIntegrate - Compute the L2 integral of a function on the GLL points

2597:    Not Collective

2599:    Input Parameters:
2600: +  n - the number of GLL nodes
2601: .  nodes - the GLL nodes
2602: .  weights - the GLL weights
2603: -  f - the function values at the nodes

2605:    Output Parameter:
2606: .  in - the value of the integral

2608:    Level: beginner

2610: .seealso: `PetscDTGaussLobattoLegendreQuadrature()`
2611: @*/
2612: PetscErrorCode PetscGaussLobattoLegendreIntegrate(PetscInt n, PetscReal *nodes, PetscReal *weights, const PetscReal *f, PetscReal *in)
2613: {
2614:   PetscInt i;

2616:   *in = 0.;
2617:   for (i = 0; i < n; i++) *in += f[i] * f[i] * weights[i];
2618:   return 0;
2619: }

2621: /*@C
2622:    PetscGaussLobattoLegendreElementLaplacianCreate - computes the Laplacian for a single 1d GLL element

2624:    Not Collective

2626:    Input Parameters:
2627: +  n - the number of GLL nodes
2628: .  nodes - the GLL nodes
2629: -  weights - the GLL weights

2631:    Output Parameter:
2632: .  A - the stiffness element

2634:    Level: beginner

2636:    Notes:
2637:    Destroy this with `PetscGaussLobattoLegendreElementLaplacianDestroy()`

2639:    You can access entries in this array with AA[i][j] but in memory it is stored in contiguous memory, row oriented (the array is symmetric)

2641: .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianDestroy()`
2642: @*/
2643: PetscErrorCode PetscGaussLobattoLegendreElementLaplacianCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2644: {
2645:   PetscReal      **A;
2646:   const PetscReal *gllnodes = nodes;
2647:   const PetscInt   p        = n - 1;
2648:   PetscReal        z0, z1, z2 = -1, x, Lpj, Lpr;
2649:   PetscInt         i, j, nn, r;

2651:   PetscMalloc1(n, &A);
2652:   PetscMalloc1(n * n, &A[0]);
2653:   for (i = 1; i < n; i++) A[i] = A[i - 1] + n;

2655:   for (j = 1; j < p; j++) {
2656:     x  = gllnodes[j];
2657:     z0 = 1.;
2658:     z1 = x;
2659:     for (nn = 1; nn < p; nn++) {
2660:       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2661:       z0 = z1;
2662:       z1 = z2;
2663:     }
2664:     Lpj = z2;
2665:     for (r = 1; r < p; r++) {
2666:       if (r == j) {
2667:         A[j][j] = 2. / (3. * (1. - gllnodes[j] * gllnodes[j]) * Lpj * Lpj);
2668:       } else {
2669:         x  = gllnodes[r];
2670:         z0 = 1.;
2671:         z1 = x;
2672:         for (nn = 1; nn < p; nn++) {
2673:           z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2674:           z0 = z1;
2675:           z1 = z2;
2676:         }
2677:         Lpr     = z2;
2678:         A[r][j] = 4. / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * Lpr * (gllnodes[j] - gllnodes[r]) * (gllnodes[j] - gllnodes[r]));
2679:       }
2680:     }
2681:   }
2682:   for (j = 1; j < p + 1; j++) {
2683:     x  = gllnodes[j];
2684:     z0 = 1.;
2685:     z1 = x;
2686:     for (nn = 1; nn < p; nn++) {
2687:       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2688:       z0 = z1;
2689:       z1 = z2;
2690:     }
2691:     Lpj     = z2;
2692:     A[j][0] = 4. * PetscPowRealInt(-1., p) / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * (1. + gllnodes[j]) * (1. + gllnodes[j]));
2693:     A[0][j] = A[j][0];
2694:   }
2695:   for (j = 0; j < p; j++) {
2696:     x  = gllnodes[j];
2697:     z0 = 1.;
2698:     z1 = x;
2699:     for (nn = 1; nn < p; nn++) {
2700:       z2 = x * z1 * (2. * ((PetscReal)nn) + 1.) / (((PetscReal)nn) + 1.) - z0 * (((PetscReal)nn) / (((PetscReal)nn) + 1.));
2701:       z0 = z1;
2702:       z1 = z2;
2703:     }
2704:     Lpj = z2;

2706:     A[p][j] = 4. / (((PetscReal)p) * (((PetscReal)p) + 1.) * Lpj * (1. - gllnodes[j]) * (1. - gllnodes[j]));
2707:     A[j][p] = A[p][j];
2708:   }
2709:   A[0][0] = 0.5 + (((PetscReal)p) * (((PetscReal)p) + 1.) - 2.) / 6.;
2710:   A[p][p] = A[0][0];
2711:   *AA     = A;
2712:   return 0;
2713: }

2715: /*@C
2716:    PetscGaussLobattoLegendreElementLaplacianDestroy - frees the Laplacian for a single 1d GLL element created with `PetscGaussLobattoLegendreElementLaplacianCreate()`

2718:    Not Collective

2720:    Input Parameters:
2721: +  n - the number of GLL nodes
2722: .  nodes - the GLL nodes
2723: .  weights - the GLL weightss
2724: -  A - the stiffness element

2726:    Level: beginner

2728: .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`
2729: @*/
2730: PetscErrorCode PetscGaussLobattoLegendreElementLaplacianDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2731: {
2732:   PetscFree((*AA)[0]);
2733:   PetscFree(*AA);
2734:   *AA = NULL;
2735:   return 0;
2736: }

2738: /*@C
2739:    PetscGaussLobattoLegendreElementGradientCreate - computes the gradient for a single 1d GLL element

2741:    Not Collective

2743:    Input Parameter:
2744: +  n - the number of GLL nodes
2745: .  nodes - the GLL nodes
2746: .  weights - the GLL weights

2748:    Output Parameters:
2749: .  AA - the stiffness element
2750: -  AAT - the transpose of AA (pass in NULL if you do not need this array)

2752:    Level: beginner

2754:    Notes:
2755:    Destroy this with `PetscGaussLobattoLegendreElementGradientDestroy()`

2757:    You can access entries in these arrays with AA[i][j] but in memory it is stored in contiguous memory, row oriented

2759: .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianDestroy()`, `PetscGaussLobattoLegendreElementGradientDestroy()`
2760: @*/
2761: PetscErrorCode PetscGaussLobattoLegendreElementGradientCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA, PetscReal ***AAT)
2762: {
2763:   PetscReal      **A, **AT = NULL;
2764:   const PetscReal *gllnodes = nodes;
2765:   const PetscInt   p        = n - 1;
2766:   PetscReal        Li, Lj, d0;
2767:   PetscInt         i, j;

2769:   PetscMalloc1(n, &A);
2770:   PetscMalloc1(n * n, &A[0]);
2771:   for (i = 1; i < n; i++) A[i] = A[i - 1] + n;

2773:   if (AAT) {
2774:     PetscMalloc1(n, &AT);
2775:     PetscMalloc1(n * n, &AT[0]);
2776:     for (i = 1; i < n; i++) AT[i] = AT[i - 1] + n;
2777:   }

2779:   if (n == 1) A[0][0] = 0.;
2780:   d0 = (PetscReal)p * ((PetscReal)p + 1.) / 4.;
2781:   for (i = 0; i < n; i++) {
2782:     for (j = 0; j < n; j++) {
2783:       A[i][j] = 0.;
2784:       PetscDTComputeJacobi(0., 0., p, gllnodes[i], &Li);
2785:       PetscDTComputeJacobi(0., 0., p, gllnodes[j], &Lj);
2786:       if (i != j) A[i][j] = Li / (Lj * (gllnodes[i] - gllnodes[j]));
2787:       if ((j == i) && (i == 0)) A[i][j] = -d0;
2788:       if (j == i && i == p) A[i][j] = d0;
2789:       if (AT) AT[j][i] = A[i][j];
2790:     }
2791:   }
2792:   if (AAT) *AAT = AT;
2793:   *AA = A;
2794:   return 0;
2795: }

2797: /*@C
2798:    PetscGaussLobattoLegendreElementGradientDestroy - frees the gradient for a single 1d GLL element obtained with `PetscGaussLobattoLegendreElementGradientCreate()`

2800:    Not Collective

2802:    Input Parameters:
2803: +  n - the number of GLL nodes
2804: .  nodes - the GLL nodes
2805: .  weights - the GLL weights
2806: .  AA - the stiffness element
2807: -  AAT - the transpose of the element

2809:    Level: beginner

2811: .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`, `PetscGaussLobattoLegendreElementAdvectionCreate()`
2812: @*/
2813: PetscErrorCode PetscGaussLobattoLegendreElementGradientDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA, PetscReal ***AAT)
2814: {
2815:   PetscFree((*AA)[0]);
2816:   PetscFree(*AA);
2817:   *AA = NULL;
2818:   if (*AAT) {
2819:     PetscFree((*AAT)[0]);
2820:     PetscFree(*AAT);
2821:     *AAT = NULL;
2822:   }
2823:   return 0;
2824: }

2826: /*@C
2827:    PetscGaussLobattoLegendreElementAdvectionCreate - computes the advection operator for a single 1d GLL element

2829:    Not Collective

2831:    Input Parameters:
2832: +  n - the number of GLL nodes
2833: .  nodes - the GLL nodes
2834: -  weights - the GLL weightss

2836:    Output Parameter:
2837: .  AA - the stiffness element

2839:    Level: beginner

2841:    Notes:
2842:    Destroy this with `PetscGaussLobattoLegendreElementAdvectionDestroy()`

2844:    This is the same as the Gradient operator multiplied by the diagonal mass matrix

2846:    You can access entries in this array with AA[i][j] but in memory it is stored in contiguous memory, row oriented

2848: .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementLaplacianCreate()`, `PetscGaussLobattoLegendreElementAdvectionDestroy()`
2849: @*/
2850: PetscErrorCode PetscGaussLobattoLegendreElementAdvectionCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2851: {
2852:   PetscReal      **D;
2853:   const PetscReal *gllweights = weights;
2854:   const PetscInt   glln       = n;
2855:   PetscInt         i, j;

2857:   PetscGaussLobattoLegendreElementGradientCreate(n, nodes, weights, &D, NULL);
2858:   for (i = 0; i < glln; i++) {
2859:     for (j = 0; j < glln; j++) D[i][j] = gllweights[i] * D[i][j];
2860:   }
2861:   *AA = D;
2862:   return 0;
2863: }

2865: /*@C
2866:    PetscGaussLobattoLegendreElementAdvectionDestroy - frees the advection stiffness for a single 1d GLL element created with `PetscGaussLobattoLegendreElementAdvectionCreate()`

2868:    Not Collective

2870:    Input Parameters:
2871: +  n - the number of GLL nodes
2872: .  nodes - the GLL nodes
2873: .  weights - the GLL weights
2874: -  A - advection

2876:    Level: beginner

2878: .seealso: `PetscDTGaussLobattoLegendreQuadrature()`, `PetscGaussLobattoLegendreElementAdvectionCreate()`
2879: @*/
2880: PetscErrorCode PetscGaussLobattoLegendreElementAdvectionDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2881: {
2882:   PetscFree((*AA)[0]);
2883:   PetscFree(*AA);
2884:   *AA = NULL;
2885:   return 0;
2886: }

2888: PetscErrorCode PetscGaussLobattoLegendreElementMassCreate(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2889: {
2890:   PetscReal      **A;
2891:   const PetscReal *gllweights = weights;
2892:   const PetscInt   glln       = n;
2893:   PetscInt         i, j;

2895:   PetscMalloc1(glln, &A);
2896:   PetscMalloc1(glln * glln, &A[0]);
2897:   for (i = 1; i < glln; i++) A[i] = A[i - 1] + glln;
2898:   if (glln == 1) A[0][0] = 0.;
2899:   for (i = 0; i < glln; i++) {
2900:     for (j = 0; j < glln; j++) {
2901:       A[i][j] = 0.;
2902:       if (j == i) A[i][j] = gllweights[i];
2903:     }
2904:   }
2905:   *AA = A;
2906:   return 0;
2907: }

2909: PetscErrorCode PetscGaussLobattoLegendreElementMassDestroy(PetscInt n, PetscReal *nodes, PetscReal *weights, PetscReal ***AA)
2910: {
2911:   PetscFree((*AA)[0]);
2912:   PetscFree(*AA);
2913:   *AA = NULL;
2914:   return 0;
2915: }

2917: /*@
2918:   PetscDTIndexToBary - convert an index into a barycentric coordinate.

2920:   Input Parameters:
2921: + len - the desired length of the barycentric tuple (usually 1 more than the dimension it represents, so a barycentric coordinate in a triangle has length 3)
2922: . sum - the value that the sum of the barycentric coordinates (which will be non-negative integers) should sum to
2923: - index - the index to convert: should be >= 0 and < Binomial(len - 1 + sum, sum)

2925:   Output Parameter:
2926: . coord - will be filled with the barycentric coordinate

2928:   Level: beginner

2930:   Note:
2931:   The indices map to barycentric coordinates in lexicographic order, where the first index is the
2932:   least significant and the last index is the most significant.

2934: .seealso: `PetscDTBaryToIndex()`
2935: @*/
2936: PetscErrorCode PetscDTIndexToBary(PetscInt len, PetscInt sum, PetscInt index, PetscInt coord[])
2937: {
2938:   PetscInt c, d, s, total, subtotal, nexttotal;

2943:   if (!len) {
2944:     if (!sum && !index) return 0;
2945:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid index or sum for length 0 barycentric coordinate");
2946:   }
2947:   for (c = 1, total = 1; c <= len; c++) {
2948:     /* total is the number of ways to have a tuple of length c with sum */
2949:     if (index < total) break;
2950:     total = (total * (sum + c)) / c;
2951:   }
2953:   for (d = c; d < len; d++) coord[d] = 0;
2954:   for (s = 0, subtotal = 1, nexttotal = 1; c > 0;) {
2955:     /* subtotal is the number of ways to have a tuple of length c with sum s */
2956:     /* nexttotal is the number of ways to have a tuple of length c-1 with sum s */
2957:     if ((index + subtotal) >= total) {
2958:       coord[--c] = sum - s;
2959:       index -= (total - subtotal);
2960:       sum       = s;
2961:       total     = nexttotal;
2962:       subtotal  = 1;
2963:       nexttotal = 1;
2964:       s         = 0;
2965:     } else {
2966:       subtotal  = (subtotal * (c + s)) / (s + 1);
2967:       nexttotal = (nexttotal * (c - 1 + s)) / (s + 1);
2968:       s++;
2969:     }
2970:   }
2971:   return 0;
2972: }

2974: /*@
2975:   PetscDTBaryToIndex - convert a barycentric coordinate to an index

2977:   Input Parameters:
2978: + len - the desired length of the barycentric tuple (usually 1 more than the dimension it represents, so a barycentric coordinate in a triangle has length 3)
2979: . sum - the value that the sum of the barycentric coordinates (which will be non-negative integers) should sum to
2980: - coord - a barycentric coordinate with the given length and sum

2982:   Output Parameter:
2983: . index - the unique index for the coordinate, >= 0 and < Binomial(len - 1 + sum, sum)

2985:   Level: beginner

2987:   Note:
2988:   The indices map to barycentric coordinates in lexicographic order, where the first index is the
2989:   least significant and the last index is the most significant.

2991: .seealso: `PetscDTIndexToBary`
2992: @*/
2993: PetscErrorCode PetscDTBaryToIndex(PetscInt len, PetscInt sum, const PetscInt coord[], PetscInt *index)
2994: {
2995:   PetscInt c;
2996:   PetscInt i;
2997:   PetscInt total;

3001:   if (!len) {
3002:     if (!sum) {
3003:       *index = 0;
3004:       return 0;
3005:     }
3006:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid index or sum for length 0 barycentric coordinate");
3007:   }
3008:   for (c = 1, total = 1; c < len; c++) total = (total * (sum + c)) / c;
3009:   i = total - 1;
3010:   c = len - 1;
3011:   sum -= coord[c];
3012:   while (sum > 0) {
3013:     PetscInt subtotal;
3014:     PetscInt s;

3016:     for (s = 1, subtotal = 1; s < sum; s++) subtotal = (subtotal * (c + s)) / s;
3017:     i -= subtotal;
3018:     sum -= coord[--c];
3019:   }
3020:   *index = i;
3021:   return 0;
3022: }