Actual source code: ex59.c
1: static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3: Spectral degree can be specified by passing values to -p option.\n\
4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6: Example usage: \n\
7: 1D: mpiexec -n 4 ex59 -nex 7\n\
8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10: Subdomain decomposition can be specified with -np_ parameters.\n\
11: Dirichlet boundaries on one side by default:\n\
12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13: Pure Neumann case can be requested by passing in -pureneumann.\n\
14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
16: #include <petscksp.h>
17: #include <petscpc.h>
18: #include <petscdm.h>
19: #include <petscdmda.h>
20: #include <petscblaslapack.h>
21: #define DEBUG 0
23: /* structure holding domain data */
24: typedef struct {
25: /* communicator */
26: MPI_Comm gcomm;
27: /* space dimension */
28: PetscInt dim;
29: /* spectral degree */
30: PetscInt p;
31: /* subdomains per dimension */
32: PetscInt npx, npy, npz;
33: /* subdomain index in cartesian dimensions */
34: PetscInt ipx, ipy, ipz;
35: /* elements per dimension */
36: PetscInt nex, ney, nez;
37: /* local elements per dimension */
38: PetscInt nex_l, ney_l, nez_l;
39: /* global number of dofs per dimension */
40: PetscInt xm, ym, zm;
41: /* local number of dofs per dimension */
42: PetscInt xm_l, ym_l, zm_l;
43: /* starting global indexes for subdomain in lexicographic ordering */
44: PetscInt startx, starty, startz;
45: /* pure Neumann problem */
46: PetscBool pure_neumann;
47: /* Dirichlet BC implementation */
48: PetscBool DBC_zerorows;
49: /* Scaling factor for subdomain */
50: PetscScalar scalingfactor;
51: PetscBool testkspfetidp;
52: } DomainData;
54: /* structure holding GLL data */
55: typedef struct {
56: /* GLL nodes */
57: PetscReal *zGL;
58: /* GLL weights */
59: PetscScalar *rhoGL;
60: /* aux_mat */
61: PetscScalar **A;
62: /* Element matrix */
63: Mat elem_mat;
64: } GLLData;
66: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
67: {
68: PetscInt *xadj_temp, *adjncy_temp;
69: PetscInt i, j, k, ii, jj, kk, iindex, count_adj;
70: PetscInt istart_csr, iend_csr, jstart_csr, jend_csr, kstart_csr, kend_csr;
71: PetscBool internal_node;
73: /* first count dimension of adjncy */
75: count_adj = 0;
76: for (k = 0; k < dd.zm_l; k++) {
77: internal_node = PETSC_TRUE;
78: kstart_csr = k > 0 ? k - 1 : k;
79: kend_csr = k < dd.zm_l - 1 ? k + 2 : k + 1;
80: if (k == 0 || k == dd.zm_l - 1) {
81: internal_node = PETSC_FALSE;
82: kstart_csr = k;
83: kend_csr = k + 1;
84: }
85: for (j = 0; j < dd.ym_l; j++) {
86: jstart_csr = j > 0 ? j - 1 : j;
87: jend_csr = j < dd.ym_l - 1 ? j + 2 : j + 1;
88: if (j == 0 || j == dd.ym_l - 1) {
89: internal_node = PETSC_FALSE;
90: jstart_csr = j;
91: jend_csr = j + 1;
92: }
93: for (i = 0; i < dd.xm_l; i++) {
94: istart_csr = i > 0 ? i - 1 : i;
95: iend_csr = i < dd.xm_l - 1 ? i + 2 : i + 1;
96: if (i == 0 || i == dd.xm_l - 1) {
97: internal_node = PETSC_FALSE;
98: istart_csr = i;
99: iend_csr = i + 1;
100: }
101: if (internal_node) {
102: istart_csr = i;
103: iend_csr = i + 1;
104: jstart_csr = j;
105: jend_csr = j + 1;
106: kstart_csr = k;
107: kend_csr = k + 1;
108: }
109: for (kk = kstart_csr; kk < kend_csr; kk++) {
110: for (jj = jstart_csr; jj < jend_csr; jj++) {
111: for (ii = istart_csr; ii < iend_csr; ii++) count_adj = count_adj + 1;
112: }
113: }
114: }
115: }
116: }
117: PetscMalloc1(dd.xm_l * dd.ym_l * dd.zm_l + 1, &xadj_temp);
118: PetscMalloc1(count_adj, &adjncy_temp);
120: /* now fill CSR data structure */
121: count_adj = 0;
122: for (k = 0; k < dd.zm_l; k++) {
123: internal_node = PETSC_TRUE;
124: kstart_csr = k > 0 ? k - 1 : k;
125: kend_csr = k < dd.zm_l - 1 ? k + 2 : k + 1;
126: if (k == 0 || k == dd.zm_l - 1) {
127: internal_node = PETSC_FALSE;
128: kstart_csr = k;
129: kend_csr = k + 1;
130: }
131: for (j = 0; j < dd.ym_l; j++) {
132: jstart_csr = j > 0 ? j - 1 : j;
133: jend_csr = j < dd.ym_l - 1 ? j + 2 : j + 1;
134: if (j == 0 || j == dd.ym_l - 1) {
135: internal_node = PETSC_FALSE;
136: jstart_csr = j;
137: jend_csr = j + 1;
138: }
139: for (i = 0; i < dd.xm_l; i++) {
140: istart_csr = i > 0 ? i - 1 : i;
141: iend_csr = i < dd.xm_l - 1 ? i + 2 : i + 1;
142: if (i == 0 || i == dd.xm_l - 1) {
143: internal_node = PETSC_FALSE;
144: istart_csr = i;
145: iend_csr = i + 1;
146: }
147: iindex = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
148: xadj_temp[iindex] = count_adj;
149: if (internal_node) {
150: istart_csr = i;
151: iend_csr = i + 1;
152: jstart_csr = j;
153: jend_csr = j + 1;
154: kstart_csr = k;
155: kend_csr = k + 1;
156: }
157: for (kk = kstart_csr; kk < kend_csr; kk++) {
158: for (jj = jstart_csr; jj < jend_csr; jj++) {
159: for (ii = istart_csr; ii < iend_csr; ii++) {
160: iindex = kk * dd.xm_l * dd.ym_l + jj * dd.xm_l + ii;
162: adjncy_temp[count_adj] = iindex;
163: count_adj = count_adj + 1;
164: }
165: }
166: }
167: }
168: }
169: }
170: xadj_temp[dd.xm_l * dd.ym_l * dd.zm_l] = count_adj;
172: *xadj = xadj_temp;
173: *adjncy = adjncy_temp;
174: return 0;
175: }
177: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd, IS *dirichlet, IS *neumann)
178: {
179: IS temp_dirichlet = 0, temp_neumann = 0;
180: PetscInt localsize, i, j, k, *indices;
181: PetscBool *touched;
184: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
186: PetscMalloc1(localsize, &indices);
187: PetscMalloc1(localsize, &touched);
188: for (i = 0; i < localsize; i++) touched[i] = PETSC_FALSE;
190: if (dirichlet) {
191: i = 0;
192: /* west boundary */
193: if (dd.ipx == 0) {
194: for (k = 0; k < dd.zm_l; k++) {
195: for (j = 0; j < dd.ym_l; j++) {
196: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
197: touched[indices[i]] = PETSC_TRUE;
198: i++;
199: }
200: }
201: }
202: ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_dirichlet);
203: }
204: if (neumann) {
205: i = 0;
206: /* west boundary */
207: if (dd.ipx == 0) {
208: for (k = 0; k < dd.zm_l; k++) {
209: for (j = 0; j < dd.ym_l; j++) {
210: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
211: if (!touched[indices[i]]) {
212: touched[indices[i]] = PETSC_TRUE;
213: i++;
214: }
215: }
216: }
217: }
218: /* east boundary */
219: if (dd.ipx == dd.npx - 1) {
220: for (k = 0; k < dd.zm_l; k++) {
221: for (j = 0; j < dd.ym_l; j++) {
222: indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l + dd.xm_l - 1;
223: if (!touched[indices[i]]) {
224: touched[indices[i]] = PETSC_TRUE;
225: i++;
226: }
227: }
228: }
229: }
230: /* south boundary */
231: if (dd.ipy == 0 && dd.dim > 1) {
232: for (k = 0; k < dd.zm_l; k++) {
233: for (j = 0; j < dd.xm_l; j++) {
234: indices[i] = k * dd.ym_l * dd.xm_l + j;
235: if (!touched[indices[i]]) {
236: touched[indices[i]] = PETSC_TRUE;
237: i++;
238: }
239: }
240: }
241: }
242: /* north boundary */
243: if (dd.ipy == dd.npy - 1 && dd.dim > 1) {
244: for (k = 0; k < dd.zm_l; k++) {
245: for (j = 0; j < dd.xm_l; j++) {
246: indices[i] = k * dd.ym_l * dd.xm_l + (dd.ym_l - 1) * dd.xm_l + j;
247: if (!touched[indices[i]]) {
248: touched[indices[i]] = PETSC_TRUE;
249: i++;
250: }
251: }
252: }
253: }
254: /* bottom boundary */
255: if (dd.ipz == 0 && dd.dim > 2) {
256: for (k = 0; k < dd.ym_l; k++) {
257: for (j = 0; j < dd.xm_l; j++) {
258: indices[i] = k * dd.xm_l + j;
259: if (!touched[indices[i]]) {
260: touched[indices[i]] = PETSC_TRUE;
261: i++;
262: }
263: }
264: }
265: }
266: /* top boundary */
267: if (dd.ipz == dd.npz - 1 && dd.dim > 2) {
268: for (k = 0; k < dd.ym_l; k++) {
269: for (j = 0; j < dd.xm_l; j++) {
270: indices[i] = (dd.zm_l - 1) * dd.ym_l * dd.xm_l + k * dd.xm_l + j;
271: if (!touched[indices[i]]) {
272: touched[indices[i]] = PETSC_TRUE;
273: i++;
274: }
275: }
276: }
277: }
278: ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_neumann);
279: }
280: if (dirichlet) *dirichlet = temp_dirichlet;
281: if (neumann) *neumann = temp_neumann;
282: PetscFree(indices);
283: PetscFree(touched);
284: return 0;
285: }
287: static PetscErrorCode ComputeMapping(DomainData dd, ISLocalToGlobalMapping *isg2lmap)
288: {
289: DM da;
290: AO ao;
291: DMBoundaryType bx = DM_BOUNDARY_NONE, by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
292: DMDAStencilType stype = DMDA_STENCIL_BOX;
293: ISLocalToGlobalMapping temp_isg2lmap;
294: PetscInt i, j, k, ig, jg, kg, lindex, gindex, localsize;
295: PetscInt *global_indices;
298: /* Not an efficient mapping: this function computes a very simple lexicographic mapping
299: just to illustrate the creation of a MATIS object */
300: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
301: PetscMalloc1(localsize, &global_indices);
302: for (k = 0; k < dd.zm_l; k++) {
303: kg = dd.startz + k;
304: for (j = 0; j < dd.ym_l; j++) {
305: jg = dd.starty + j;
306: for (i = 0; i < dd.xm_l; i++) {
307: ig = dd.startx + i;
308: lindex = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
309: gindex = kg * dd.xm * dd.ym + jg * dd.xm + ig;
310: global_indices[lindex] = gindex;
311: }
312: }
313: }
314: if (dd.dim == 3) {
315: DMDACreate3d(dd.gcomm, bx, by, bz, stype, dd.xm, dd.ym, dd.zm, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, NULL, &da);
316: } else if (dd.dim == 2) {
317: DMDACreate2d(dd.gcomm, bx, by, stype, dd.xm, dd.ym, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da);
318: } else {
319: DMDACreate1d(dd.gcomm, bx, dd.xm, 1, 1, NULL, &da);
320: }
321: DMSetFromOptions(da);
322: DMSetUp(da);
323: DMDASetAOType(da, AOMEMORYSCALABLE);
324: DMDAGetAO(da, &ao);
325: AOApplicationToPetsc(ao, dd.xm_l * dd.ym_l * dd.zm_l, global_indices);
326: ISLocalToGlobalMappingCreate(dd.gcomm, 1, localsize, global_indices, PETSC_OWN_POINTER, &temp_isg2lmap);
327: DMDestroy(&da);
328: *isg2lmap = temp_isg2lmap;
329: return 0;
330: }
331: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
332: {
333: PetscInt localsize, zloc, yloc, xloc, auxnex, auxney, auxnez;
334: PetscInt ie, je, ke, i, j, k, ig, jg, kg, ii, ming;
335: PetscInt *indexg, *cols, *colsg;
336: PetscScalar *vals;
337: Mat temp_local_mat, elem_mat_DBC = 0, *usedmat;
338: IS submatIS;
341: MatGetSize(glldata.elem_mat, &i, &j);
342: PetscMalloc1(i, &indexg);
343: PetscMalloc1(i, &colsg);
344: /* get submatrix of elem_mat without dirichlet nodes */
345: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
346: xloc = dd.p + 1;
347: yloc = 1;
348: zloc = 1;
349: if (dd.dim > 1) yloc = dd.p + 1;
350: if (dd.dim > 2) zloc = dd.p + 1;
351: ii = 0;
352: for (k = 0; k < zloc; k++) {
353: for (j = 0; j < yloc; j++) {
354: for (i = 1; i < xloc; i++) {
355: indexg[ii] = k * xloc * yloc + j * xloc + i;
356: ii++;
357: }
358: }
359: }
360: ISCreateGeneral(PETSC_COMM_SELF, ii, indexg, PETSC_COPY_VALUES, &submatIS);
361: MatCreateSubMatrix(glldata.elem_mat, submatIS, submatIS, MAT_INITIAL_MATRIX, &elem_mat_DBC);
362: ISDestroy(&submatIS);
363: }
365: /* Assemble subdomain matrix */
366: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
367: MatCreate(PETSC_COMM_SELF, &temp_local_mat);
368: MatSetSizes(temp_local_mat, localsize, localsize, localsize, localsize);
369: MatSetOptionsPrefix(temp_local_mat, "subdomain_");
370: /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
371: /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
372: if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
373: MatSetType(temp_local_mat, MATSEQAIJ);
374: } else {
375: MatSetType(temp_local_mat, MATSEQSBAIJ);
376: }
377: MatSetFromOptions(temp_local_mat);
379: i = PetscPowRealInt(3.0 * (dd.p + 1.0), dd.dim);
381: MatSeqAIJSetPreallocation(temp_local_mat, i, NULL); /* very overestimated */
382: MatSeqSBAIJSetPreallocation(temp_local_mat, 1, i, NULL); /* very overestimated */
383: MatSeqBAIJSetPreallocation(temp_local_mat, 1, i, NULL); /* very overestimated */
384: MatSetOption(temp_local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);
386: yloc = dd.p + 1;
387: zloc = dd.p + 1;
388: if (dd.dim < 3) zloc = 1;
389: if (dd.dim < 2) yloc = 1;
391: auxnez = dd.nez_l;
392: auxney = dd.ney_l;
393: auxnex = dd.nex_l;
394: if (dd.dim < 3) auxnez = 1;
395: if (dd.dim < 2) auxney = 1;
397: for (ke = 0; ke < auxnez; ke++) {
398: for (je = 0; je < auxney; je++) {
399: for (ie = 0; ie < auxnex; ie++) {
400: /* customize element accounting for BC */
401: xloc = dd.p + 1;
402: ming = 0;
403: usedmat = &glldata.elem_mat;
404: if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
405: if (ie == 0) {
406: xloc = dd.p;
407: usedmat = &elem_mat_DBC;
408: } else {
409: ming = -1;
410: usedmat = &glldata.elem_mat;
411: }
412: }
413: /* local to the element/global to the subdomain indexing */
414: for (k = 0; k < zloc; k++) {
415: kg = ke * dd.p + k;
416: for (j = 0; j < yloc; j++) {
417: jg = je * dd.p + j;
418: for (i = 0; i < xloc; i++) {
419: ig = ie * dd.p + i + ming;
420: ii = k * xloc * yloc + j * xloc + i;
421: indexg[ii] = kg * dd.xm_l * dd.ym_l + jg * dd.xm_l + ig;
422: }
423: }
424: }
425: /* Set values */
426: for (i = 0; i < xloc * yloc * zloc; i++) {
427: MatGetRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals);
428: for (k = 0; k < j; k++) colsg[k] = indexg[cols[k]];
429: MatSetValues(temp_local_mat, 1, &indexg[i], j, colsg, vals, ADD_VALUES);
430: MatRestoreRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals);
431: }
432: }
433: }
434: }
435: PetscFree(indexg);
436: PetscFree(colsg);
437: MatAssemblyBegin(temp_local_mat, MAT_FINAL_ASSEMBLY);
438: MatAssemblyEnd(temp_local_mat, MAT_FINAL_ASSEMBLY);
439: #if DEBUG
440: {
441: Vec lvec, rvec;
442: PetscReal norm;
443: MatCreateVecs(temp_local_mat, &lvec, &rvec);
444: VecSet(lvec, 1.0);
445: MatMult(temp_local_mat, lvec, rvec);
446: VecNorm(rvec, NORM_INFINITY, &norm);
447: VecDestroy(&lvec);
448: VecDestroy(&rvec);
449: }
450: #endif
451: *local_mat = temp_local_mat;
452: MatDestroy(&elem_mat_DBC);
453: return 0;
454: }
456: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
457: {
458: PetscReal *M, si;
459: PetscScalar x, z0, z1, z2, Lpj, Lpr, rhoGLj, rhoGLk;
460: PetscBLASInt pm1, lierr;
461: PetscInt i, j, n, k, s, r, q, ii, jj, p = dd.p;
462: PetscInt xloc, yloc, zloc, xyloc, xyzloc;
465: /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
466: PetscCalloc1(p + 1, &glldata->zGL);
468: glldata->zGL[0] = -1.0;
469: glldata->zGL[p] = 1.0;
470: if (p > 1) {
471: if (p == 2) glldata->zGL[1] = 0.0;
472: else {
473: PetscMalloc1(p - 1, &M);
474: for (i = 0; i < p - 1; i++) {
475: si = (PetscReal)(i + 1.0);
476: M[i] = 0.5 * PetscSqrtReal(si * (si + 2.0) / ((si + 0.5) * (si + 1.5)));
477: }
478: pm1 = p - 1;
479: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
480: PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("N", &pm1, &glldata->zGL[1], M, &x, &pm1, M, &lierr));
482: PetscFPTrapPop();
483: PetscFree(M);
484: }
485: }
487: /* Weights for 1D quadrature */
488: PetscMalloc1(p + 1, &glldata->rhoGL);
490: glldata->rhoGL[0] = 2.0 / (PetscScalar)(p * (p + 1.0));
491: glldata->rhoGL[p] = glldata->rhoGL[0];
492: z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */
493: for (i = 1; i < p; i++) {
494: x = glldata->zGL[i];
495: z0 = 1.0;
496: z1 = x;
497: for (n = 1; n < p; n++) {
498: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
499: z0 = z1;
500: z1 = z2;
501: }
502: glldata->rhoGL[i] = 2.0 / (p * (p + 1.0) * z2 * z2);
503: }
505: /* Auxiliary mat for laplacian */
506: PetscMalloc1(p + 1, &glldata->A);
507: PetscMalloc1((p + 1) * (p + 1), &glldata->A[0]);
508: for (i = 1; i < p + 1; i++) glldata->A[i] = glldata->A[i - 1] + p + 1;
510: for (j = 1; j < p; j++) {
511: x = glldata->zGL[j];
512: z0 = 1.0;
513: z1 = x;
514: for (n = 1; n < p; n++) {
515: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
516: z0 = z1;
517: z1 = z2;
518: }
519: Lpj = z2;
520: for (r = 1; r < p; r++) {
521: if (r == j) {
522: glldata->A[j][j] = 2.0 / (3.0 * (1.0 - glldata->zGL[j] * glldata->zGL[j]) * Lpj * Lpj);
523: } else {
524: x = glldata->zGL[r];
525: z0 = 1.0;
526: z1 = x;
527: for (n = 1; n < p; n++) {
528: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
529: z0 = z1;
530: z1 = z2;
531: }
532: Lpr = z2;
533: glldata->A[r][j] = 4.0 / (p * (p + 1.0) * Lpj * Lpr * (glldata->zGL[j] - glldata->zGL[r]) * (glldata->zGL[j] - glldata->zGL[r]));
534: }
535: }
536: }
537: for (j = 1; j < p + 1; j++) {
538: x = glldata->zGL[j];
539: z0 = 1.0;
540: z1 = x;
541: for (n = 1; n < p; n++) {
542: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
543: z0 = z1;
544: z1 = z2;
545: }
546: Lpj = z2;
547: glldata->A[j][0] = 4.0 * PetscPowRealInt(-1.0, p) / (p * (p + 1.0) * Lpj * (1.0 + glldata->zGL[j]) * (1.0 + glldata->zGL[j]));
548: glldata->A[0][j] = glldata->A[j][0];
549: }
550: for (j = 0; j < p; j++) {
551: x = glldata->zGL[j];
552: z0 = 1.0;
553: z1 = x;
554: for (n = 1; n < p; n++) {
555: z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
556: z0 = z1;
557: z1 = z2;
558: }
559: Lpj = z2;
561: glldata->A[p][j] = 4.0 / (p * (p + 1.0) * Lpj * (1.0 - glldata->zGL[j]) * (1.0 - glldata->zGL[j]));
562: glldata->A[j][p] = glldata->A[p][j];
563: }
564: glldata->A[0][0] = 0.5 + (p * (p + 1.0) - 2.0) / 6.0;
565: glldata->A[p][p] = glldata->A[0][0];
567: /* compute element matrix */
568: xloc = p + 1;
569: yloc = p + 1;
570: zloc = p + 1;
571: if (dd.dim < 2) yloc = 1;
572: if (dd.dim < 3) zloc = 1;
573: xyloc = xloc * yloc;
574: xyzloc = xloc * yloc * zloc;
576: MatCreate(PETSC_COMM_SELF, &glldata->elem_mat);
577: MatSetSizes(glldata->elem_mat, xyzloc, xyzloc, xyzloc, xyzloc);
578: MatSetType(glldata->elem_mat, MATSEQAIJ);
579: MatSeqAIJSetPreallocation(glldata->elem_mat, xyzloc, NULL); /* overestimated */
580: MatZeroEntries(glldata->elem_mat);
581: MatSetOption(glldata->elem_mat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE);
583: for (k = 0; k < zloc; k++) {
584: if (dd.dim > 2) rhoGLk = glldata->rhoGL[k];
585: else rhoGLk = 1.0;
587: for (j = 0; j < yloc; j++) {
588: if (dd.dim > 1) rhoGLj = glldata->rhoGL[j];
589: else rhoGLj = 1.0;
591: for (i = 0; i < xloc; i++) {
592: ii = k * xyloc + j * xloc + i;
593: s = k;
594: r = j;
595: for (q = 0; q < xloc; q++) {
596: jj = s * xyloc + r * xloc + q;
597: MatSetValue(glldata->elem_mat, jj, ii, glldata->A[i][q] * rhoGLj * rhoGLk, ADD_VALUES);
598: }
599: if (dd.dim > 1) {
600: s = k;
601: q = i;
602: for (r = 0; r < yloc; r++) {
603: jj = s * xyloc + r * xloc + q;
604: MatSetValue(glldata->elem_mat, jj, ii, glldata->A[j][r] * glldata->rhoGL[i] * rhoGLk, ADD_VALUES);
605: }
606: }
607: if (dd.dim > 2) {
608: r = j;
609: q = i;
610: for (s = 0; s < zloc; s++) {
611: jj = s * xyloc + r * xloc + q;
612: MatSetValue(glldata->elem_mat, jj, ii, glldata->A[k][s] * rhoGLj * glldata->rhoGL[i], ADD_VALUES);
613: }
614: }
615: }
616: }
617: }
618: MatAssemblyBegin(glldata->elem_mat, MAT_FINAL_ASSEMBLY);
619: MatAssemblyEnd(glldata->elem_mat, MAT_FINAL_ASSEMBLY);
620: #if DEBUG
621: {
622: Vec lvec, rvec;
623: PetscReal norm;
624: MatCreateVecs(glldata->elem_mat, &lvec, &rvec);
625: VecSet(lvec, 1.0);
626: MatMult(glldata->elem_mat, lvec, rvec);
627: VecNorm(rvec, NORM_INFINITY, &norm);
628: VecDestroy(&lvec);
629: VecDestroy(&rvec);
630: }
631: #endif
632: return 0;
633: }
635: static PetscErrorCode DomainDecomposition(DomainData *dd)
636: {
637: PetscMPIInt rank;
638: PetscInt i, j, k;
641: /* Subdomain index in cartesian coordinates */
642: MPI_Comm_rank(dd->gcomm, &rank);
643: dd->ipx = rank % dd->npx;
644: if (dd->dim > 1) dd->ipz = rank / (dd->npx * dd->npy);
645: else dd->ipz = 0;
647: dd->ipy = rank / dd->npx - dd->ipz * dd->npy;
648: /* number of local elements */
649: dd->nex_l = dd->nex / dd->npx;
650: if (dd->ipx < dd->nex % dd->npx) dd->nex_l++;
651: if (dd->dim > 1) {
652: dd->ney_l = dd->ney / dd->npy;
653: if (dd->ipy < dd->ney % dd->npy) dd->ney_l++;
654: } else {
655: dd->ney_l = 0;
656: }
657: if (dd->dim > 2) {
658: dd->nez_l = dd->nez / dd->npz;
659: if (dd->ipz < dd->nez % dd->npz) dd->nez_l++;
660: } else {
661: dd->nez_l = 0;
662: }
663: /* local and global number of dofs */
664: dd->xm_l = dd->nex_l * dd->p + 1;
665: dd->xm = dd->nex * dd->p + 1;
666: dd->ym_l = dd->ney_l * dd->p + 1;
667: dd->ym = dd->ney * dd->p + 1;
668: dd->zm_l = dd->nez_l * dd->p + 1;
669: dd->zm = dd->nez * dd->p + 1;
670: if (!dd->pure_neumann) {
671: if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
672: if (!dd->DBC_zerorows) dd->xm--;
673: }
675: /* starting global index for local dofs (simple lexicographic order) */
676: dd->startx = 0;
677: j = dd->nex / dd->npx;
678: for (i = 0; i < dd->ipx; i++) {
679: k = j;
680: if (i < dd->nex % dd->npx) k++;
681: dd->startx = dd->startx + k * dd->p;
682: }
683: if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
685: dd->starty = 0;
686: if (dd->dim > 1) {
687: j = dd->ney / dd->npy;
688: for (i = 0; i < dd->ipy; i++) {
689: k = j;
690: if (i < dd->ney % dd->npy) k++;
691: dd->starty = dd->starty + k * dd->p;
692: }
693: }
694: dd->startz = 0;
695: if (dd->dim > 2) {
696: j = dd->nez / dd->npz;
697: for (i = 0; i < dd->ipz; i++) {
698: k = j;
699: if (i < dd->nez % dd->npz) k++;
700: dd->startz = dd->startz + k * dd->p;
701: }
702: }
703: return 0;
704: }
705: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
706: {
707: GLLData gll;
708: Mat local_mat = 0, temp_A = 0;
709: ISLocalToGlobalMapping matis_map = 0;
710: IS dirichletIS = 0;
713: /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
714: GLLStuffs(dd, &gll);
715: /* Compute matrix of subdomain Neumann problem */
716: ComputeSubdomainMatrix(dd, gll, &local_mat);
717: /* Compute global mapping of local dofs */
718: ComputeMapping(dd, &matis_map);
719: /* Create MATIS object needed by BDDC */
720: MatCreateIS(dd.gcomm, 1, PETSC_DECIDE, PETSC_DECIDE, dd.xm * dd.ym * dd.zm, dd.xm * dd.ym * dd.zm, matis_map, matis_map, &temp_A);
721: /* Set local subdomain matrices into MATIS object */
722: MatScale(local_mat, dd.scalingfactor);
723: MatISSetLocalMat(temp_A, local_mat);
724: /* Call assembly functions */
725: MatAssemblyBegin(temp_A, MAT_FINAL_ASSEMBLY);
726: MatAssemblyEnd(temp_A, MAT_FINAL_ASSEMBLY);
728: if (dd.DBC_zerorows) {
729: PetscInt dirsize;
731: ComputeSpecialBoundaryIndices(dd, &dirichletIS, NULL);
732: MatSetOption(local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE);
733: MatZeroRowsColumnsLocalIS(temp_A, dirichletIS, 1.0, NULL, NULL);
734: ISGetLocalSize(dirichletIS, &dirsize);
735: ISDestroy(&dirichletIS);
736: }
738: /* giving hints to local and global matrices could be useful for the BDDC */
739: MatSetOption(local_mat, MAT_SPD, PETSC_TRUE);
740: MatSetOption(local_mat, MAT_SPD_ETERNAL, PETSC_TRUE);
741: #if DEBUG
742: {
743: Vec lvec, rvec;
744: PetscReal norm;
745: MatCreateVecs(temp_A, &lvec, &rvec);
746: VecSet(lvec, 1.0);
747: MatMult(temp_A, lvec, rvec);
748: VecNorm(rvec, NORM_INFINITY, &norm);
749: VecDestroy(&lvec);
750: VecDestroy(&rvec);
751: }
752: #endif
753: /* free allocated workspace */
754: PetscFree(gll.zGL);
755: PetscFree(gll.rhoGL);
756: PetscFree(gll.A[0]);
757: PetscFree(gll.A);
758: MatDestroy(&gll.elem_mat);
759: MatDestroy(&local_mat);
760: ISLocalToGlobalMappingDestroy(&matis_map);
761: /* give back the pointer to te MATIS object */
762: *A = temp_A;
763: return 0;
764: }
766: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
767: {
768: KSP temp_ksp;
769: PC pc;
772: KSPGetPC(ksp_bddc, &pc);
773: if (!dd.testkspfetidp) {
774: PC D;
775: Mat F;
777: PCBDDCCreateFETIDPOperators(pc, PETSC_TRUE, NULL, &F, &D);
778: KSPCreate(PetscObjectComm((PetscObject)F), &temp_ksp);
779: KSPSetOperators(temp_ksp, F, F);
780: KSPSetType(temp_ksp, KSPCG);
781: KSPSetPC(temp_ksp, D);
782: KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE);
783: KSPSetOptionsPrefix(temp_ksp, "fluxes_");
784: KSPSetFromOptions(temp_ksp);
785: KSPSetUp(temp_ksp);
786: MatDestroy(&F);
787: PCDestroy(&D);
788: } else {
789: Mat A, Ap;
791: KSPCreate(PetscObjectComm((PetscObject)ksp_bddc), &temp_ksp);
792: KSPGetOperators(ksp_bddc, &A, &Ap);
793: KSPSetOperators(temp_ksp, A, Ap);
794: KSPSetOptionsPrefix(temp_ksp, "fluxes_");
795: KSPSetType(temp_ksp, KSPFETIDP);
796: KSPFETIDPSetInnerBDDC(temp_ksp, pc);
797: KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE);
798: KSPSetFromOptions(temp_ksp);
799: KSPSetUp(temp_ksp);
800: }
801: *ksp_fetidp = temp_ksp;
802: return 0;
803: }
805: static PetscErrorCode ComputeKSPBDDC(DomainData dd, Mat A, KSP *ksp)
806: {
807: KSP temp_ksp;
808: PC pc;
809: IS primals, dirichletIS = 0, neumannIS = 0, *bddc_dofs_splitting;
810: PetscInt vidx[8], localsize, *xadj = NULL, *adjncy = NULL;
811: MatNullSpace near_null_space;
814: KSPCreate(dd.gcomm, &temp_ksp);
815: KSPSetOperators(temp_ksp, A, A);
816: KSPSetType(temp_ksp, KSPCG);
817: KSPGetPC(temp_ksp, &pc);
818: PCSetType(pc, PCBDDC);
820: localsize = dd.xm_l * dd.ym_l * dd.zm_l;
822: /* BDDC customization */
824: /* jumping coefficients case */
825: PCISSetSubdomainScalingFactor(pc, dd.scalingfactor);
827: /* Dofs splitting
828: Simple stride-1 IS
829: It is not needed since, by default, PCBDDC assumes a stride-1 split */
830: PetscMalloc1(1, &bddc_dofs_splitting);
831: #if 1
832: ISCreateStride(PETSC_COMM_WORLD, localsize, 0, 1, &bddc_dofs_splitting[0]);
833: PCBDDCSetDofsSplittingLocal(pc, 1, bddc_dofs_splitting);
834: #else
835: /* examples for global ordering */
837: /* each process lists the nodes it owns */
838: PetscInt sr, er;
839: MatGetOwnershipRange(A, &sr, &er);
840: ISCreateStride(PETSC_COMM_WORLD, er - sr, sr, 1, &bddc_dofs_splitting[0]);
841: PCBDDCSetDofsSplitting(pc, 1, bddc_dofs_splitting);
842: /* Split can be passed in a more general way since any process can list any node */
843: #endif
844: ISDestroy(&bddc_dofs_splitting[0]);
845: PetscFree(bddc_dofs_splitting);
847: /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
848: (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
849: #if 0
850: Vec vecs[2];
851: PetscRandom rctx;
852: MatCreateVecs(A,&vecs[0],&vecs[1]);
853: PetscRandomCreate(dd.gcomm,&rctx);
854: VecSetRandom(vecs[0],rctx);
855: VecSetRandom(vecs[1],rctx);
856: MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);
857: VecDestroy(&vecs[0]);
858: VecDestroy(&vecs[1]);
859: PetscRandomDestroy(&rctx);
860: #else
861: MatNullSpaceCreate(dd.gcomm, PETSC_TRUE, 0, NULL, &near_null_space);
862: #endif
863: MatSetNearNullSpace(A, near_null_space);
864: MatNullSpaceDestroy(&near_null_space);
866: /* CSR graph of subdomain dofs */
867: BuildCSRGraph(dd, &xadj, &adjncy);
868: PCBDDCSetLocalAdjacencyGraph(pc, localsize, xadj, adjncy, PETSC_OWN_POINTER);
870: /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
871: vidx[0] = 0 * dd.xm_l + 0;
872: vidx[1] = 0 * dd.xm_l + dd.xm_l - 1;
873: vidx[2] = (dd.ym_l - 1) * dd.xm_l + 0;
874: vidx[3] = (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
875: vidx[4] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + 0;
876: vidx[5] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + dd.xm_l - 1;
877: vidx[6] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + 0;
878: vidx[7] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
879: ISCreateGeneral(dd.gcomm, 8, vidx, PETSC_COPY_VALUES, &primals);
880: PCBDDCSetPrimalVerticesLocalIS(pc, primals);
881: ISDestroy(&primals);
883: /* Neumann/Dirichlet indices on the global boundary */
884: if (dd.DBC_zerorows) {
885: /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
886: ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS);
887: PCBDDCSetNeumannBoundariesLocal(pc, neumannIS);
888: PCBDDCSetDirichletBoundariesLocal(pc, dirichletIS);
889: } else {
890: if (dd.pure_neumann) {
891: /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
892: ComputeSpecialBoundaryIndices(dd, NULL, &neumannIS);
893: PCBDDCSetNeumannBoundariesLocal(pc, neumannIS);
894: } else {
895: /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
896: /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
897: ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS);
898: PCBDDCSetNeumannBoundariesLocal(pc, neumannIS);
899: }
900: }
902: /* Pass local null space information to local matrices (needed when using approximate local solvers) */
903: if (dd.ipx || dd.pure_neumann) {
904: MatNullSpace nsp;
905: Mat local_mat;
907: MatISGetLocalMat(A, &local_mat);
908: MatNullSpaceCreate(PETSC_COMM_SELF, PETSC_TRUE, 0, NULL, &nsp);
909: MatSetNullSpace(local_mat, nsp);
910: MatNullSpaceDestroy(&nsp);
911: }
912: KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE);
913: KSPSetOptionsPrefix(temp_ksp, "physical_");
914: KSPSetFromOptions(temp_ksp);
915: KSPSetUp(temp_ksp);
916: *ksp = temp_ksp;
917: ISDestroy(&dirichletIS);
918: ISDestroy(&neumannIS);
919: return 0;
920: }
922: static PetscErrorCode InitializeDomainData(DomainData *dd)
923: {
924: PetscMPIInt sizes, rank;
925: PetscInt factor;
928: dd->gcomm = PETSC_COMM_WORLD;
929: MPI_Comm_size(dd->gcomm, &sizes);
930: MPI_Comm_rank(dd->gcomm, &rank);
931: /* Get information from command line */
932: /* Processors/subdomains per dimension */
933: /* Default is 1d problem */
934: dd->npx = sizes;
935: dd->npy = 0;
936: dd->npz = 0;
937: dd->dim = 1;
938: PetscOptionsGetInt(NULL, NULL, "-npx", &dd->npx, NULL);
939: PetscOptionsGetInt(NULL, NULL, "-npy", &dd->npy, NULL);
940: PetscOptionsGetInt(NULL, NULL, "-npz", &dd->npz, NULL);
941: if (dd->npy) dd->dim++;
942: if (dd->npz) dd->dim++;
943: /* Number of elements per dimension */
944: /* Default is one element per subdomain */
945: dd->nex = dd->npx;
946: dd->ney = dd->npy;
947: dd->nez = dd->npz;
948: PetscOptionsGetInt(NULL, NULL, "-nex", &dd->nex, NULL);
949: PetscOptionsGetInt(NULL, NULL, "-ney", &dd->ney, NULL);
950: PetscOptionsGetInt(NULL, NULL, "-nez", &dd->nez, NULL);
951: if (!dd->npy) {
952: dd->ney = 0;
953: dd->nez = 0;
954: }
955: if (!dd->npz) dd->nez = 0;
956: /* Spectral degree */
957: dd->p = 3;
958: PetscOptionsGetInt(NULL, NULL, "-p", &dd->p, NULL);
959: /* pure neumann problem? */
960: dd->pure_neumann = PETSC_FALSE;
961: PetscOptionsGetBool(NULL, NULL, "-pureneumann", &dd->pure_neumann, NULL);
963: /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
964: dd->DBC_zerorows = PETSC_FALSE;
966: PetscOptionsGetBool(NULL, NULL, "-usezerorows", &dd->DBC_zerorows, NULL);
967: if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
968: dd->scalingfactor = 1.0;
970: factor = 0.0;
971: PetscOptionsGetInt(NULL, NULL, "-jump", &factor, NULL);
972: /* checkerboard pattern */
973: dd->scalingfactor = PetscPowScalar(10.0, (PetscScalar)factor * PetscPowScalar(-1.0, (PetscScalar)rank));
974: /* test data passed in */
975: if (dd->dim == 1) {
978: } else if (dd->dim == 2) {
981: } else {
984: }
985: return 0;
986: }
988: int main(int argc, char **args)
989: {
990: DomainData dd;
991: PetscReal norm, maxeig, mineig;
992: PetscScalar scalar_value;
993: PetscInt ndofs, its;
994: Mat A = NULL, F = NULL;
995: KSP KSPwithBDDC = NULL, KSPwithFETIDP = NULL;
996: KSPConvergedReason reason;
997: Vec exact_solution = NULL, bddc_solution = NULL, bddc_rhs = NULL;
998: PetscBool testfetidp = PETSC_TRUE;
1000: /* Init PETSc */
1002: PetscInitialize(&argc, &args, (char *)0, help);
1003: /* Initialize DomainData */
1004: InitializeDomainData(&dd);
1005: /* Decompose domain */
1006: DomainDecomposition(&dd);
1007: #if DEBUG
1008: printf("Subdomain data\n");
1009: printf("IPS : %d %d %d\n", dd.ipx, dd.ipy, dd.ipz);
1010: printf("NEG : %d %d %d\n", dd.nex, dd.ney, dd.nez);
1011: printf("NEL : %d %d %d\n", dd.nex_l, dd.ney_l, dd.nez_l);
1012: printf("LDO : %d %d %d\n", dd.xm_l, dd.ym_l, dd.zm_l);
1013: printf("SIZES : %d %d %d\n", dd.xm, dd.ym, dd.zm);
1014: printf("STARTS: %d %d %d\n", dd.startx, dd.starty, dd.startz);
1015: #endif
1016: dd.testkspfetidp = PETSC_TRUE;
1017: PetscOptionsGetBool(NULL, NULL, "-testfetidp", &testfetidp, NULL);
1018: PetscOptionsGetBool(NULL, NULL, "-testkspfetidp", &dd.testkspfetidp, NULL);
1019: /* assemble global matrix */
1020: ComputeMatrix(dd, &A);
1021: /* get work vectors */
1022: MatCreateVecs(A, &bddc_solution, NULL);
1023: VecDuplicate(bddc_solution, &bddc_rhs);
1024: VecDuplicate(bddc_solution, &exact_solution);
1025: /* create and customize KSP/PC for BDDC */
1026: ComputeKSPBDDC(dd, A, &KSPwithBDDC);
1027: /* create KSP/PC for FETIDP */
1028: if (testfetidp) ComputeKSPFETIDP(dd, KSPwithBDDC, &KSPwithFETIDP);
1029: /* create random exact solution */
1030: #if defined(PETSC_USE_COMPLEX)
1031: VecSet(exact_solution, 1.0 + PETSC_i);
1032: #else
1033: VecSetRandom(exact_solution, NULL);
1034: #endif
1035: VecShift(exact_solution, -0.5);
1036: VecScale(exact_solution, 100.0);
1037: VecGetSize(exact_solution, &ndofs);
1038: if (dd.pure_neumann) {
1039: VecSum(exact_solution, &scalar_value);
1040: scalar_value = -scalar_value / (PetscScalar)ndofs;
1041: VecShift(exact_solution, scalar_value);
1042: }
1043: /* assemble BDDC rhs */
1044: MatMult(A, exact_solution, bddc_rhs);
1045: /* test ksp with BDDC */
1046: KSPSolve(KSPwithBDDC, bddc_rhs, bddc_solution);
1047: KSPGetIterationNumber(KSPwithBDDC, &its);
1048: KSPGetConvergedReason(KSPwithBDDC, &reason);
1049: KSPComputeExtremeSingularValues(KSPwithBDDC, &maxeig, &mineig);
1050: if (dd.pure_neumann) {
1051: VecSum(bddc_solution, &scalar_value);
1052: scalar_value = -scalar_value / (PetscScalar)ndofs;
1053: VecShift(bddc_solution, scalar_value);
1054: }
1055: /* check exact_solution and BDDC solultion */
1056: VecAXPY(bddc_solution, -1.0, exact_solution);
1057: VecNorm(bddc_solution, NORM_INFINITY, &norm);
1058: PetscPrintf(dd.gcomm, "---------------------BDDC stats-------------------------------\n");
1059: PetscPrintf(dd.gcomm, "Number of degrees of freedom : %8" PetscInt_FMT "\n", ndofs);
1060: if (reason < 0) {
1061: PetscPrintf(dd.gcomm, "Number of iterations : %8" PetscInt_FMT "\n", its);
1062: PetscPrintf(dd.gcomm, "Converged reason : %s\n", KSPConvergedReasons[reason]);
1063: }
1064: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1065: PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.);
1066: if (norm > 1.e-1 || reason < 0) PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm);
1067: PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n");
1068: if (testfetidp) {
1069: Vec fetidp_solution_all = NULL, fetidp_solution = NULL, fetidp_rhs = NULL;
1071: VecDuplicate(bddc_solution, &fetidp_solution_all);
1072: if (!dd.testkspfetidp) {
1073: /* assemble fetidp rhs on the space of Lagrange multipliers */
1074: KSPGetOperators(KSPwithFETIDP, &F, NULL);
1075: MatCreateVecs(F, &fetidp_solution, &fetidp_rhs);
1076: PCBDDCMatFETIDPGetRHS(F, bddc_rhs, fetidp_rhs);
1077: VecSet(fetidp_solution, 0.0);
1078: /* test ksp with FETIDP */
1079: KSPSolve(KSPwithFETIDP, fetidp_rhs, fetidp_solution);
1080: /* assemble fetidp solution on physical domain */
1081: PCBDDCMatFETIDPGetSolution(F, fetidp_solution, fetidp_solution_all);
1082: } else {
1083: KSP kspF;
1084: KSPSolve(KSPwithFETIDP, bddc_rhs, fetidp_solution_all);
1085: KSPFETIDPGetInnerKSP(KSPwithFETIDP, &kspF);
1086: KSPGetOperators(kspF, &F, NULL);
1087: }
1088: MatGetSize(F, &ndofs, NULL);
1089: KSPGetIterationNumber(KSPwithFETIDP, &its);
1090: KSPGetConvergedReason(KSPwithFETIDP, &reason);
1091: KSPComputeExtremeSingularValues(KSPwithFETIDP, &maxeig, &mineig);
1092: /* check FETIDP sol */
1093: if (dd.pure_neumann) {
1094: VecSum(fetidp_solution_all, &scalar_value);
1095: scalar_value = -scalar_value / (PetscScalar)ndofs;
1096: VecShift(fetidp_solution_all, scalar_value);
1097: }
1098: VecAXPY(fetidp_solution_all, -1.0, exact_solution);
1099: VecNorm(fetidp_solution_all, NORM_INFINITY, &norm);
1100: PetscPrintf(dd.gcomm, "------------------FETI-DP stats-------------------------------\n");
1101: PetscPrintf(dd.gcomm, "Number of degrees of freedom : %8" PetscInt_FMT "\n", ndofs);
1102: if (reason < 0) {
1103: PetscPrintf(dd.gcomm, "Number of iterations : %8" PetscInt_FMT "\n", its);
1104: PetscPrintf(dd.gcomm, "Converged reason : %s\n", KSPConvergedReasons[reason]);
1105: }
1106: if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1107: PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.);
1108: if (norm > 1.e-1 || reason < 0) PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm);
1109: PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n");
1110: VecDestroy(&fetidp_solution);
1111: VecDestroy(&fetidp_solution_all);
1112: VecDestroy(&fetidp_rhs);
1113: }
1114: KSPDestroy(&KSPwithFETIDP);
1115: VecDestroy(&exact_solution);
1116: VecDestroy(&bddc_solution);
1117: VecDestroy(&bddc_rhs);
1118: MatDestroy(&A);
1119: KSPDestroy(&KSPwithBDDC);
1120: /* Quit PETSc */
1121: PetscFinalize();
1122: return 0;
1123: }
1125: /*TEST
1127: testset:
1128: nsize: 4
1129: args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1130: output_file: output/ex59_bddc_fetidp_1.out
1131: test:
1132: suffix: bddc_fetidp_1
1133: test:
1134: requires: viennacl
1135: suffix: bddc_fetidp_1_viennacl
1136: args: -subdomain_mat_type aijviennacl
1137: test:
1138: requires: cuda
1139: suffix: bddc_fetidp_1_cuda
1140: args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1142: testset:
1143: nsize: 4
1144: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1145: requires: !single
1146: test:
1147: suffix: bddc_fetidp_2
1148: test:
1149: suffix: bddc_fetidp_3
1150: args: -npz 1 -nez 1
1151: test:
1152: suffix: bddc_fetidp_4
1153: args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1155: testset:
1156: nsize: 8
1157: suffix: bddc_fetidp_approximate
1158: args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1160: testset:
1161: nsize: 4
1162: args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1163: filter: grep -v "variant HERMITIAN"
1164: requires: !single
1165: test:
1166: suffix: bddc_fetidp_ml_1
1167: args: -physical_pc_bddc_coarsening_ratio 1
1168: test:
1169: suffix: bddc_fetidp_ml_2
1170: args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1171: test:
1172: suffix: bddc_fetidp_ml_3
1173: args: -physical_pc_bddc_coarsening_ratio 4
1175: testset:
1176: nsize: 9
1177: args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1178: output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1179: test:
1180: suffix: bddc_fetidp_ml_eqlimit_1
1181: args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1182: test:
1183: suffix: bddc_fetidp_ml_eqlimit_2
1184: args: -physical_pc_bddc_coarse_eqs_limit 46
1186: TEST*/