Actual source code: telescope_dmda.c
2: #include <petsc/private/matimpl.h>
3: #include <petsc/private/pcimpl.h>
4: #include <petsc/private/dmimpl.h>
5: #include <petscksp.h>
6: #include <petscdm.h>
7: #include <petscdmda.h>
9: #include "../src/ksp/pc/impls/telescope/telescope.h"
11: static PetscBool cited = PETSC_FALSE;
12: static const char citation[] = "@inproceedings{MaySananRuppKnepleySmith2016,\n"
13: " title = {Extreme-Scale Multigrid Components within PETSc},\n"
14: " author = {Dave A. May and Patrick Sanan and Karl Rupp and Matthew G. Knepley and Barry F. Smith},\n"
15: " booktitle = {Proceedings of the Platform for Advanced Scientific Computing Conference},\n"
16: " series = {PASC '16},\n"
17: " isbn = {978-1-4503-4126-4},\n"
18: " location = {Lausanne, Switzerland},\n"
19: " pages = {5:1--5:12},\n"
20: " articleno = {5},\n"
21: " numpages = {12},\n"
22: " url = {https://doi.acm.org/10.1145/2929908.2929913},\n"
23: " doi = {10.1145/2929908.2929913},\n"
24: " acmid = {2929913},\n"
25: " publisher = {ACM},\n"
26: " address = {New York, NY, USA},\n"
27: " keywords = {GPU, HPC, agglomeration, coarse-level solver, multigrid, parallel computing, preconditioning},\n"
28: " year = {2016}\n"
29: "}\n";
31: static PetscErrorCode _DMDADetermineRankFromGlobalIJK(PetscInt dim, PetscInt i, PetscInt j, PetscInt k, PetscInt Mp, PetscInt Np, PetscInt Pp, PetscInt start_i[], PetscInt start_j[], PetscInt start_k[], PetscInt span_i[], PetscInt span_j[], PetscInt span_k[], PetscMPIInt *_pi, PetscMPIInt *_pj, PetscMPIInt *_pk, PetscMPIInt *rank_re)
32: {
33: PetscInt pi, pj, pk, n;
35: *rank_re = -1;
36: if (_pi) *_pi = -1;
37: if (_pj) *_pj = -1;
38: if (_pk) *_pk = -1;
39: pi = pj = pk = -1;
40: if (_pi) {
41: for (n = 0; n < Mp; n++) {
42: if ((i >= start_i[n]) && (i < start_i[n] + span_i[n])) {
43: pi = n;
44: break;
45: }
46: }
48: *_pi = pi;
49: }
51: if (_pj) {
52: for (n = 0; n < Np; n++) {
53: if ((j >= start_j[n]) && (j < start_j[n] + span_j[n])) {
54: pj = n;
55: break;
56: }
57: }
59: *_pj = pj;
60: }
62: if (_pk) {
63: for (n = 0; n < Pp; n++) {
64: if ((k >= start_k[n]) && (k < start_k[n] + span_k[n])) {
65: pk = n;
66: break;
67: }
68: }
70: *_pk = pk;
71: }
73: switch (dim) {
74: case 1:
75: *rank_re = pi;
76: break;
77: case 2:
78: *rank_re = pi + pj * Mp;
79: break;
80: case 3:
81: *rank_re = pi + pj * Mp + pk * (Mp * Np);
82: break;
83: }
84: return 0;
85: }
87: static PetscErrorCode _DMDADetermineGlobalS0(PetscInt dim, PetscMPIInt rank_re, PetscInt Mp_re, PetscInt Np_re, PetscInt Pp_re, PetscInt range_i_re[], PetscInt range_j_re[], PetscInt range_k_re[], PetscInt *s0)
88: {
89: PetscInt i, j, k, start_IJK = 0;
90: PetscInt rank_ijk;
92: switch (dim) {
93: case 1:
94: for (i = 0; i < Mp_re; i++) {
95: rank_ijk = i;
96: if (rank_ijk < rank_re) start_IJK += range_i_re[i];
97: }
98: break;
99: case 2:
100: for (j = 0; j < Np_re; j++) {
101: for (i = 0; i < Mp_re; i++) {
102: rank_ijk = i + j * Mp_re;
103: if (rank_ijk < rank_re) start_IJK += range_i_re[i] * range_j_re[j];
104: }
105: }
106: break;
107: case 3:
108: for (k = 0; k < Pp_re; k++) {
109: for (j = 0; j < Np_re; j++) {
110: for (i = 0; i < Mp_re; i++) {
111: rank_ijk = i + j * Mp_re + k * Mp_re * Np_re;
112: if (rank_ijk < rank_re) start_IJK += range_i_re[i] * range_j_re[j] * range_k_re[k];
113: }
114: }
115: }
116: break;
117: }
118: *s0 = start_IJK;
119: return 0;
120: }
122: static PetscErrorCode PCTelescopeSetUp_dmda_repart_coors2d(PC_Telescope sred, DM dm, DM subdm)
123: {
124: DM cdm;
125: Vec coor, coor_natural, perm_coors;
126: PetscInt i, j, si, sj, ni, nj, M, N, Ml, Nl, c, nidx;
127: PetscInt *fine_indices;
128: IS is_fine, is_local;
129: VecScatter sctx;
131: DMGetCoordinates(dm, &coor);
132: if (!coor) return (0);
133: if (PCTelescope_isActiveRank(sred)) DMDASetUniformCoordinates(subdm, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
134: /* Get the coordinate vector from the distributed array */
135: DMGetCoordinateDM(dm, &cdm);
136: DMDACreateNaturalVector(cdm, &coor_natural);
138: DMDAGlobalToNaturalBegin(cdm, coor, INSERT_VALUES, coor_natural);
139: DMDAGlobalToNaturalEnd(cdm, coor, INSERT_VALUES, coor_natural);
141: /* get indices of the guys I want to grab */
142: DMDAGetInfo(dm, NULL, &M, &N, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
143: if (PCTelescope_isActiveRank(sred)) {
144: DMDAGetCorners(subdm, &si, &sj, NULL, &ni, &nj, NULL);
145: Ml = ni;
146: Nl = nj;
147: } else {
148: si = sj = 0;
149: ni = nj = 0;
150: Ml = Nl = 0;
151: }
153: PetscMalloc1(Ml * Nl * 2, &fine_indices);
154: c = 0;
155: if (PCTelescope_isActiveRank(sred)) {
156: for (j = sj; j < sj + nj; j++) {
157: for (i = si; i < si + ni; i++) {
158: nidx = (i) + (j)*M;
159: fine_indices[c] = 2 * nidx;
160: fine_indices[c + 1] = 2 * nidx + 1;
161: c = c + 2;
162: }
163: }
165: }
167: /* generate scatter */
168: ISCreateGeneral(PetscObjectComm((PetscObject)dm), Ml * Nl * 2, fine_indices, PETSC_USE_POINTER, &is_fine);
169: ISCreateStride(PETSC_COMM_SELF, Ml * Nl * 2, 0, 1, &is_local);
171: /* scatter */
172: VecCreate(PETSC_COMM_SELF, &perm_coors);
173: VecSetSizes(perm_coors, PETSC_DECIDE, Ml * Nl * 2);
174: VecSetType(perm_coors, VECSEQ);
176: VecScatterCreate(coor_natural, is_fine, perm_coors, is_local, &sctx);
177: VecScatterBegin(sctx, coor_natural, perm_coors, INSERT_VALUES, SCATTER_FORWARD);
178: VecScatterEnd(sctx, coor_natural, perm_coors, INSERT_VALUES, SCATTER_FORWARD);
179: /* access */
180: if (PCTelescope_isActiveRank(sred)) {
181: Vec _coors;
182: const PetscScalar *LA_perm;
183: PetscScalar *LA_coors;
185: DMGetCoordinates(subdm, &_coors);
186: VecGetArrayRead(perm_coors, &LA_perm);
187: VecGetArray(_coors, &LA_coors);
188: for (i = 0; i < Ml * Nl * 2; i++) LA_coors[i] = LA_perm[i];
189: VecRestoreArray(_coors, &LA_coors);
190: VecRestoreArrayRead(perm_coors, &LA_perm);
191: }
193: /* update local coords */
194: if (PCTelescope_isActiveRank(sred)) {
195: DM _dmc;
196: Vec _coors, _coors_local;
197: DMGetCoordinateDM(subdm, &_dmc);
198: DMGetCoordinates(subdm, &_coors);
199: DMGetCoordinatesLocal(subdm, &_coors_local);
200: DMGlobalToLocalBegin(_dmc, _coors, INSERT_VALUES, _coors_local);
201: DMGlobalToLocalEnd(_dmc, _coors, INSERT_VALUES, _coors_local);
202: }
203: VecScatterDestroy(&sctx);
204: ISDestroy(&is_fine);
205: PetscFree(fine_indices);
206: ISDestroy(&is_local);
207: VecDestroy(&perm_coors);
208: VecDestroy(&coor_natural);
209: return 0;
210: }
212: static PetscErrorCode PCTelescopeSetUp_dmda_repart_coors3d(PC_Telescope sred, DM dm, DM subdm)
213: {
214: DM cdm;
215: Vec coor, coor_natural, perm_coors;
216: PetscInt i, j, k, si, sj, sk, ni, nj, nk, M, N, P, Ml, Nl, Pl, c, nidx;
217: PetscInt *fine_indices;
218: IS is_fine, is_local;
219: VecScatter sctx;
221: DMGetCoordinates(dm, &coor);
222: if (!coor) return 0;
224: if (PCTelescope_isActiveRank(sred)) DMDASetUniformCoordinates(subdm, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
226: /* Get the coordinate vector from the distributed array */
227: DMGetCoordinateDM(dm, &cdm);
228: DMDACreateNaturalVector(cdm, &coor_natural);
229: DMDAGlobalToNaturalBegin(cdm, coor, INSERT_VALUES, coor_natural);
230: DMDAGlobalToNaturalEnd(cdm, coor, INSERT_VALUES, coor_natural);
232: /* get indices of the guys I want to grab */
233: DMDAGetInfo(dm, NULL, &M, &N, &P, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
235: if (PCTelescope_isActiveRank(sred)) {
236: DMDAGetCorners(subdm, &si, &sj, &sk, &ni, &nj, &nk);
237: Ml = ni;
238: Nl = nj;
239: Pl = nk;
240: } else {
241: si = sj = sk = 0;
242: ni = nj = nk = 0;
243: Ml = Nl = Pl = 0;
244: }
246: PetscMalloc1(Ml * Nl * Pl * 3, &fine_indices);
248: c = 0;
249: if (PCTelescope_isActiveRank(sred)) {
250: for (k = sk; k < sk + nk; k++) {
251: for (j = sj; j < sj + nj; j++) {
252: for (i = si; i < si + ni; i++) {
253: nidx = (i) + (j)*M + (k)*M * N;
254: fine_indices[c] = 3 * nidx;
255: fine_indices[c + 1] = 3 * nidx + 1;
256: fine_indices[c + 2] = 3 * nidx + 2;
257: c = c + 3;
258: }
259: }
260: }
261: }
263: /* generate scatter */
264: ISCreateGeneral(PetscObjectComm((PetscObject)dm), Ml * Nl * Pl * 3, fine_indices, PETSC_USE_POINTER, &is_fine);
265: ISCreateStride(PETSC_COMM_SELF, Ml * Nl * Pl * 3, 0, 1, &is_local);
267: /* scatter */
268: VecCreate(PETSC_COMM_SELF, &perm_coors);
269: VecSetSizes(perm_coors, PETSC_DECIDE, Ml * Nl * Pl * 3);
270: VecSetType(perm_coors, VECSEQ);
271: VecScatterCreate(coor_natural, is_fine, perm_coors, is_local, &sctx);
272: VecScatterBegin(sctx, coor_natural, perm_coors, INSERT_VALUES, SCATTER_FORWARD);
273: VecScatterEnd(sctx, coor_natural, perm_coors, INSERT_VALUES, SCATTER_FORWARD);
275: /* access */
276: if (PCTelescope_isActiveRank(sred)) {
277: Vec _coors;
278: const PetscScalar *LA_perm;
279: PetscScalar *LA_coors;
281: DMGetCoordinates(subdm, &_coors);
282: VecGetArrayRead(perm_coors, &LA_perm);
283: VecGetArray(_coors, &LA_coors);
284: for (i = 0; i < Ml * Nl * Pl * 3; i++) LA_coors[i] = LA_perm[i];
285: VecRestoreArray(_coors, &LA_coors);
286: VecRestoreArrayRead(perm_coors, &LA_perm);
287: }
289: /* update local coords */
290: if (PCTelescope_isActiveRank(sred)) {
291: DM _dmc;
292: Vec _coors, _coors_local;
294: DMGetCoordinateDM(subdm, &_dmc);
295: DMGetCoordinates(subdm, &_coors);
296: DMGetCoordinatesLocal(subdm, &_coors_local);
297: DMGlobalToLocalBegin(_dmc, _coors, INSERT_VALUES, _coors_local);
298: DMGlobalToLocalEnd(_dmc, _coors, INSERT_VALUES, _coors_local);
299: }
301: VecScatterDestroy(&sctx);
302: ISDestroy(&is_fine);
303: PetscFree(fine_indices);
304: ISDestroy(&is_local);
305: VecDestroy(&perm_coors);
306: VecDestroy(&coor_natural);
307: return 0;
308: }
310: static PetscErrorCode PCTelescopeSetUp_dmda_repart_coors(PC pc, PC_Telescope sred, PC_Telescope_DMDACtx *ctx)
311: {
312: PetscInt dim;
313: DM dm, subdm;
314: PetscSubcomm psubcomm;
315: MPI_Comm comm;
316: Vec coor;
318: PCGetDM(pc, &dm);
319: DMGetCoordinates(dm, &coor);
320: if (!coor) return 0;
321: psubcomm = sred->psubcomm;
322: comm = PetscSubcommParent(psubcomm);
323: subdm = ctx->dmrepart;
325: PetscInfo(pc, "PCTelescope: setting up the coordinates (DMDA)\n");
326: DMDAGetInfo(dm, &dim, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
327: switch (dim) {
328: case 1:
329: SETERRQ(comm, PETSC_ERR_SUP, "Telescope: DMDA (1D) repartitioning not provided");
330: case 2:
331: PCTelescopeSetUp_dmda_repart_coors2d(sred, dm, subdm);
332: break;
333: case 3:
334: PCTelescopeSetUp_dmda_repart_coors3d(sred, dm, subdm);
335: break;
336: }
337: return 0;
338: }
340: /* setup repartitioned dm */
341: PetscErrorCode PCTelescopeSetUp_dmda_repart(PC pc, PC_Telescope sred, PC_Telescope_DMDACtx *ctx)
342: {
343: DM dm;
344: PetscInt dim, nx, ny, nz, ndof, nsw, sum, k;
345: DMBoundaryType bx, by, bz;
346: DMDAStencilType stencil;
347: const PetscInt *_range_i_re;
348: const PetscInt *_range_j_re;
349: const PetscInt *_range_k_re;
350: DMDAInterpolationType itype;
351: PetscInt refine_x, refine_y, refine_z;
352: MPI_Comm comm, subcomm;
353: const char *prefix;
355: comm = PetscSubcommParent(sred->psubcomm);
356: subcomm = PetscSubcommChild(sred->psubcomm);
357: PCGetDM(pc, &dm);
359: DMDAGetInfo(dm, &dim, &nx, &ny, &nz, NULL, NULL, NULL, &ndof, &nsw, &bx, &by, &bz, &stencil);
360: DMDAGetInterpolationType(dm, &itype);
361: DMDAGetRefinementFactor(dm, &refine_x, &refine_y, &refine_z);
363: ctx->dmrepart = NULL;
364: _range_i_re = _range_j_re = _range_k_re = NULL;
365: /* Create DMDA on the child communicator */
366: if (PCTelescope_isActiveRank(sred)) {
367: switch (dim) {
368: case 1:
369: PetscInfo(pc, "PCTelescope: setting up the DMDA on comm subset (1D)\n");
370: /* DMDACreate1d(subcomm,bx,nx,ndof,nsw,NULL,&ctx->dmrepart); */
371: ny = nz = 1;
372: by = bz = DM_BOUNDARY_NONE;
373: break;
374: case 2:
375: PetscInfo(pc, "PCTelescope: setting up the DMDA on comm subset (2D)\n");
376: /* PetscCall(DMDACreate2d(subcomm,bx,by,stencil,nx,ny, PETSC_DECIDE,PETSC_DECIDE,
377: ndof,nsw, NULL,NULL,&ctx->dmrepart)); */
378: nz = 1;
379: bz = DM_BOUNDARY_NONE;
380: break;
381: case 3:
382: PetscInfo(pc, "PCTelescope: setting up the DMDA on comm subset (3D)\n");
383: /* PetscCall(DMDACreate3d(subcomm,bx,by,bz,stencil,nx,ny,nz,
384: PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE, ndof,nsw, NULL,NULL,NULL,&ctx->dmrepart)); */
385: break;
386: }
387: /*
388: The API DMDACreate1d(), DMDACreate2d(), DMDACreate3d() does not allow us to set/append
389: a unique option prefix for the DM, thus I prefer to expose the contents of these API's here.
390: This allows users to control the partitioning of the subDM.
391: */
392: DMDACreate(subcomm, &ctx->dmrepart);
393: /* Set unique option prefix name */
394: KSPGetOptionsPrefix(sred->ksp, &prefix);
395: DMSetOptionsPrefix(ctx->dmrepart, prefix);
396: DMAppendOptionsPrefix(ctx->dmrepart, "repart_");
397: /* standard setup from DMDACreate{1,2,3}d() */
398: DMSetDimension(ctx->dmrepart, dim);
399: DMDASetSizes(ctx->dmrepart, nx, ny, nz);
400: DMDASetNumProcs(ctx->dmrepart, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE);
401: DMDASetBoundaryType(ctx->dmrepart, bx, by, bz);
402: DMDASetDof(ctx->dmrepart, ndof);
403: DMDASetStencilType(ctx->dmrepart, stencil);
404: DMDASetStencilWidth(ctx->dmrepart, nsw);
405: DMDASetOwnershipRanges(ctx->dmrepart, NULL, NULL, NULL);
406: DMSetFromOptions(ctx->dmrepart);
407: DMSetUp(ctx->dmrepart);
408: /* Set refinement factors and interpolation type from the partent */
409: DMDASetRefinementFactor(ctx->dmrepart, refine_x, refine_y, refine_z);
410: DMDASetInterpolationType(ctx->dmrepart, itype);
412: DMDAGetInfo(ctx->dmrepart, NULL, NULL, NULL, NULL, &ctx->Mp_re, &ctx->Np_re, &ctx->Pp_re, NULL, NULL, NULL, NULL, NULL, NULL);
413: DMDAGetOwnershipRanges(ctx->dmrepart, &_range_i_re, &_range_j_re, &_range_k_re);
415: ctx->dmrepart->ops->creatematrix = dm->ops->creatematrix;
416: ctx->dmrepart->ops->createdomaindecomposition = dm->ops->createdomaindecomposition;
417: }
419: /* generate ranges for repartitioned dm */
420: /* note - assume rank 0 always participates */
421: /* TODO: use a single MPI call */
422: MPI_Bcast(&ctx->Mp_re, 1, MPIU_INT, 0, comm);
423: MPI_Bcast(&ctx->Np_re, 1, MPIU_INT, 0, comm);
424: MPI_Bcast(&ctx->Pp_re, 1, MPIU_INT, 0, comm);
426: PetscCalloc3(ctx->Mp_re, &ctx->range_i_re, ctx->Np_re, &ctx->range_j_re, ctx->Pp_re, &ctx->range_k_re);
428: if (_range_i_re) PetscArraycpy(ctx->range_i_re, _range_i_re, ctx->Mp_re);
429: if (_range_j_re) PetscArraycpy(ctx->range_j_re, _range_j_re, ctx->Np_re);
430: if (_range_k_re) PetscArraycpy(ctx->range_k_re, _range_k_re, ctx->Pp_re);
432: /* TODO: use a single MPI call */
433: MPI_Bcast(ctx->range_i_re, ctx->Mp_re, MPIU_INT, 0, comm);
434: MPI_Bcast(ctx->range_j_re, ctx->Np_re, MPIU_INT, 0, comm);
435: MPI_Bcast(ctx->range_k_re, ctx->Pp_re, MPIU_INT, 0, comm);
437: PetscMalloc3(ctx->Mp_re, &ctx->start_i_re, ctx->Np_re, &ctx->start_j_re, ctx->Pp_re, &ctx->start_k_re);
439: sum = 0;
440: for (k = 0; k < ctx->Mp_re; k++) {
441: ctx->start_i_re[k] = sum;
442: sum += ctx->range_i_re[k];
443: }
445: sum = 0;
446: for (k = 0; k < ctx->Np_re; k++) {
447: ctx->start_j_re[k] = sum;
448: sum += ctx->range_j_re[k];
449: }
451: sum = 0;
452: for (k = 0; k < ctx->Pp_re; k++) {
453: ctx->start_k_re[k] = sum;
454: sum += ctx->range_k_re[k];
455: }
457: /* attach repartitioned dm to child ksp */
458: {
459: PetscErrorCode (*dmksp_func)(KSP, Mat, Mat, void *);
460: void *dmksp_ctx;
462: DMKSPGetComputeOperators(dm, &dmksp_func, &dmksp_ctx);
464: /* attach dm to ksp on sub communicator */
465: if (PCTelescope_isActiveRank(sred)) {
466: KSPSetDM(sred->ksp, ctx->dmrepart);
468: if (!dmksp_func || sred->ignore_kspcomputeoperators) {
469: KSPSetDMActive(sred->ksp, PETSC_FALSE);
470: } else {
471: /* sub ksp inherits dmksp_func and context provided by user */
472: KSPSetComputeOperators(sred->ksp, dmksp_func, dmksp_ctx);
473: KSPSetDMActive(sred->ksp, PETSC_TRUE);
474: }
475: }
476: }
477: return 0;
478: }
480: PetscErrorCode PCTelescopeSetUp_dmda_permutation_3d(PC pc, PC_Telescope sred, PC_Telescope_DMDACtx *ctx)
481: {
482: DM dm;
483: MPI_Comm comm;
484: Mat Pscalar, P;
485: PetscInt ndof;
486: PetscInt i, j, k, location, startI[3], endI[3], lenI[3], nx, ny, nz;
487: PetscInt sr, er, Mr;
488: Vec V;
490: PetscInfo(pc, "PCTelescope: setting up the permutation matrix (DMDA-3D)\n");
491: PetscObjectGetComm((PetscObject)pc, &comm);
493: PCGetDM(pc, &dm);
494: DMDAGetInfo(dm, NULL, &nx, &ny, &nz, NULL, NULL, NULL, &ndof, NULL, NULL, NULL, NULL, NULL);
496: DMGetGlobalVector(dm, &V);
497: VecGetSize(V, &Mr);
498: VecGetOwnershipRange(V, &sr, &er);
499: DMRestoreGlobalVector(dm, &V);
500: sr = sr / ndof;
501: er = er / ndof;
502: Mr = Mr / ndof;
504: MatCreate(comm, &Pscalar);
505: MatSetSizes(Pscalar, (er - sr), (er - sr), Mr, Mr);
506: MatSetType(Pscalar, MATAIJ);
507: MatSeqAIJSetPreallocation(Pscalar, 1, NULL);
508: MatMPIAIJSetPreallocation(Pscalar, 1, NULL, 1, NULL);
510: DMDAGetCorners(dm, NULL, NULL, NULL, &lenI[0], &lenI[1], &lenI[2]);
511: DMDAGetCorners(dm, &startI[0], &startI[1], &startI[2], &endI[0], &endI[1], &endI[2]);
512: endI[0] += startI[0];
513: endI[1] += startI[1];
514: endI[2] += startI[2];
516: for (k = startI[2]; k < endI[2]; k++) {
517: for (j = startI[1]; j < endI[1]; j++) {
518: for (i = startI[0]; i < endI[0]; i++) {
519: PetscMPIInt rank_ijk_re, rank_reI[3];
520: PetscInt s0_re;
521: PetscInt ii, jj, kk, local_ijk_re, mapped_ijk;
522: PetscInt lenI_re[3];
524: location = (i - startI[0]) + (j - startI[1]) * lenI[0] + (k - startI[2]) * lenI[0] * lenI[1];
525: _DMDADetermineRankFromGlobalIJK(3, i, j, k, ctx->Mp_re, ctx->Np_re, ctx->Pp_re, ctx->start_i_re, ctx->start_j_re, ctx->start_k_re, ctx->range_i_re, ctx->range_j_re, ctx->range_k_re, &rank_reI[0], &rank_reI[1], &rank_reI[2], &rank_ijk_re);
526: _DMDADetermineGlobalS0(3, rank_ijk_re, ctx->Mp_re, ctx->Np_re, ctx->Pp_re, ctx->range_i_re, ctx->range_j_re, ctx->range_k_re, &s0_re);
527: ii = i - ctx->start_i_re[rank_reI[0]];
529: jj = j - ctx->start_j_re[rank_reI[1]];
531: kk = k - ctx->start_k_re[rank_reI[2]];
533: lenI_re[0] = ctx->range_i_re[rank_reI[0]];
534: lenI_re[1] = ctx->range_j_re[rank_reI[1]];
535: lenI_re[2] = ctx->range_k_re[rank_reI[2]];
536: local_ijk_re = ii + jj * lenI_re[0] + kk * lenI_re[0] * lenI_re[1];
537: mapped_ijk = s0_re + local_ijk_re;
538: MatSetValue(Pscalar, sr + location, mapped_ijk, 1.0, INSERT_VALUES);
539: }
540: }
541: }
542: MatAssemblyBegin(Pscalar, MAT_FINAL_ASSEMBLY);
543: MatAssemblyEnd(Pscalar, MAT_FINAL_ASSEMBLY);
544: MatCreateMAIJ(Pscalar, ndof, &P);
545: MatDestroy(&Pscalar);
546: ctx->permutation = P;
547: return 0;
548: }
550: PetscErrorCode PCTelescopeSetUp_dmda_permutation_2d(PC pc, PC_Telescope sred, PC_Telescope_DMDACtx *ctx)
551: {
552: DM dm;
553: MPI_Comm comm;
554: Mat Pscalar, P;
555: PetscInt ndof;
556: PetscInt i, j, location, startI[2], endI[2], lenI[2], nx, ny, nz;
557: PetscInt sr, er, Mr;
558: Vec V;
560: PetscInfo(pc, "PCTelescope: setting up the permutation matrix (DMDA-2D)\n");
561: PetscObjectGetComm((PetscObject)pc, &comm);
562: PCGetDM(pc, &dm);
563: DMDAGetInfo(dm, NULL, &nx, &ny, &nz, NULL, NULL, NULL, &ndof, NULL, NULL, NULL, NULL, NULL);
564: DMGetGlobalVector(dm, &V);
565: VecGetSize(V, &Mr);
566: VecGetOwnershipRange(V, &sr, &er);
567: DMRestoreGlobalVector(dm, &V);
568: sr = sr / ndof;
569: er = er / ndof;
570: Mr = Mr / ndof;
572: MatCreate(comm, &Pscalar);
573: MatSetSizes(Pscalar, (er - sr), (er - sr), Mr, Mr);
574: MatSetType(Pscalar, MATAIJ);
575: MatSeqAIJSetPreallocation(Pscalar, 1, NULL);
576: MatMPIAIJSetPreallocation(Pscalar, 1, NULL, 1, NULL);
578: DMDAGetCorners(dm, NULL, NULL, NULL, &lenI[0], &lenI[1], NULL);
579: DMDAGetCorners(dm, &startI[0], &startI[1], NULL, &endI[0], &endI[1], NULL);
580: endI[0] += startI[0];
581: endI[1] += startI[1];
583: for (j = startI[1]; j < endI[1]; j++) {
584: for (i = startI[0]; i < endI[0]; i++) {
585: PetscMPIInt rank_ijk_re, rank_reI[3];
586: PetscInt s0_re;
587: PetscInt ii, jj, local_ijk_re, mapped_ijk;
588: PetscInt lenI_re[3];
590: location = (i - startI[0]) + (j - startI[1]) * lenI[0];
591: _DMDADetermineRankFromGlobalIJK(2, i, j, 0, ctx->Mp_re, ctx->Np_re, ctx->Pp_re, ctx->start_i_re, ctx->start_j_re, ctx->start_k_re, ctx->range_i_re, ctx->range_j_re, ctx->range_k_re, &rank_reI[0], &rank_reI[1], NULL, &rank_ijk_re);
593: _DMDADetermineGlobalS0(2, rank_ijk_re, ctx->Mp_re, ctx->Np_re, ctx->Pp_re, ctx->range_i_re, ctx->range_j_re, ctx->range_k_re, &s0_re);
595: ii = i - ctx->start_i_re[rank_reI[0]];
597: jj = j - ctx->start_j_re[rank_reI[1]];
600: lenI_re[0] = ctx->range_i_re[rank_reI[0]];
601: lenI_re[1] = ctx->range_j_re[rank_reI[1]];
602: local_ijk_re = ii + jj * lenI_re[0];
603: mapped_ijk = s0_re + local_ijk_re;
604: MatSetValue(Pscalar, sr + location, mapped_ijk, 1.0, INSERT_VALUES);
605: }
606: }
607: MatAssemblyBegin(Pscalar, MAT_FINAL_ASSEMBLY);
608: MatAssemblyEnd(Pscalar, MAT_FINAL_ASSEMBLY);
609: MatCreateMAIJ(Pscalar, ndof, &P);
610: MatDestroy(&Pscalar);
611: ctx->permutation = P;
612: return 0;
613: }
615: PetscErrorCode PCTelescopeSetUp_dmda_scatters(PC pc, PC_Telescope sred, PC_Telescope_DMDACtx *ctx)
616: {
617: Vec xred, yred, xtmp, x, xp;
618: VecScatter scatter;
619: IS isin;
620: Mat B;
621: PetscInt m, bs, st, ed;
622: MPI_Comm comm;
624: PetscObjectGetComm((PetscObject)pc, &comm);
625: PCGetOperators(pc, NULL, &B);
626: MatCreateVecs(B, &x, NULL);
627: MatGetBlockSize(B, &bs);
628: VecDuplicate(x, &xp);
629: m = 0;
630: xred = NULL;
631: yred = NULL;
632: if (PCTelescope_isActiveRank(sred)) {
633: DMCreateGlobalVector(ctx->dmrepart, &xred);
634: VecDuplicate(xred, &yred);
635: VecGetOwnershipRange(xred, &st, &ed);
636: ISCreateStride(comm, ed - st, st, 1, &isin);
637: VecGetLocalSize(xred, &m);
638: } else {
639: VecGetOwnershipRange(x, &st, &ed);
640: ISCreateStride(comm, 0, st, 1, &isin);
641: }
642: ISSetBlockSize(isin, bs);
643: VecCreate(comm, &xtmp);
644: VecSetSizes(xtmp, m, PETSC_DECIDE);
645: VecSetBlockSize(xtmp, bs);
646: VecSetType(xtmp, ((PetscObject)x)->type_name);
647: VecScatterCreate(x, isin, xtmp, NULL, &scatter);
648: sred->xred = xred;
649: sred->yred = yred;
650: sred->isin = isin;
651: sred->scatter = scatter;
652: sred->xtmp = xtmp;
654: ctx->xp = xp;
655: VecDestroy(&x);
656: return 0;
657: }
659: PetscErrorCode PCTelescopeSetUp_dmda(PC pc, PC_Telescope sred)
660: {
661: PC_Telescope_DMDACtx *ctx;
662: PetscInt dim;
663: DM dm;
664: MPI_Comm comm;
666: PetscInfo(pc, "PCTelescope: setup (DMDA)\n");
667: PetscNew(&ctx);
668: sred->dm_ctx = (void *)ctx;
670: PetscObjectGetComm((PetscObject)pc, &comm);
671: PCGetDM(pc, &dm);
672: DMDAGetInfo(dm, &dim, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
674: PCTelescopeSetUp_dmda_repart(pc, sred, ctx);
675: PCTelescopeSetUp_dmda_repart_coors(pc, sred, ctx);
676: switch (dim) {
677: case 1:
678: SETERRQ(comm, PETSC_ERR_SUP, "Telescope: DMDA (1D) repartitioning not provided");
679: case 2:
680: PCTelescopeSetUp_dmda_permutation_2d(pc, sred, ctx);
681: break;
682: case 3:
683: PCTelescopeSetUp_dmda_permutation_3d(pc, sred, ctx);
684: break;
685: }
686: PCTelescopeSetUp_dmda_scatters(pc, sred, ctx);
687: return 0;
688: }
690: PetscErrorCode PCTelescopeMatCreate_dmda_dmactivefalse(PC pc, PC_Telescope sred, MatReuse reuse, Mat *A)
691: {
692: PC_Telescope_DMDACtx *ctx;
693: MPI_Comm comm, subcomm;
694: Mat Bperm, Bred, B, P;
695: PetscInt nr, nc;
696: IS isrow, iscol;
697: Mat Blocal, *_Blocal;
699: PetscInfo(pc, "PCTelescope: updating the redundant preconditioned operator (DMDA)\n");
700: PetscObjectGetComm((PetscObject)pc, &comm);
701: subcomm = PetscSubcommChild(sred->psubcomm);
702: ctx = (PC_Telescope_DMDACtx *)sred->dm_ctx;
704: PCGetOperators(pc, NULL, &B);
705: MatGetSize(B, &nr, &nc);
707: P = ctx->permutation;
708: MatPtAP(B, P, MAT_INITIAL_MATRIX, 1.1, &Bperm);
710: /* Get submatrices */
711: isrow = sred->isin;
712: ISCreateStride(comm, nc, 0, 1, &iscol);
714: MatCreateSubMatrices(Bperm, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, &_Blocal);
715: Blocal = *_Blocal;
716: Bred = NULL;
717: if (PCTelescope_isActiveRank(sred)) {
718: PetscInt mm;
720: if (reuse != MAT_INITIAL_MATRIX) Bred = *A;
721: MatGetSize(Blocal, &mm, NULL);
722: /* MatCreateMPIMatConcatenateSeqMat(subcomm,Blocal,PETSC_DECIDE,reuse,&Bred); */
723: MatCreateMPIMatConcatenateSeqMat(subcomm, Blocal, mm, reuse, &Bred);
724: }
725: *A = Bred;
727: ISDestroy(&iscol);
728: MatDestroy(&Bperm);
729: MatDestroyMatrices(1, &_Blocal);
730: return 0;
731: }
733: PetscErrorCode PCTelescopeMatCreate_dmda(PC pc, PC_Telescope sred, MatReuse reuse, Mat *A)
734: {
735: DM dm;
736: PetscErrorCode (*dmksp_func)(KSP, Mat, Mat, void *);
737: void *dmksp_ctx;
739: PCGetDM(pc, &dm);
740: DMKSPGetComputeOperators(dm, &dmksp_func, &dmksp_ctx);
741: /* We assume that dmksp_func = NULL, is equivalent to dmActive = PETSC_FALSE */
742: if (dmksp_func && !sred->ignore_kspcomputeoperators) {
743: DM dmrepart;
744: Mat Ak;
746: *A = NULL;
747: if (PCTelescope_isActiveRank(sred)) {
748: KSPGetDM(sred->ksp, &dmrepart);
749: if (reuse == MAT_INITIAL_MATRIX) {
750: DMCreateMatrix(dmrepart, &Ak);
751: *A = Ak;
752: } else if (reuse == MAT_REUSE_MATRIX) {
753: Ak = *A;
754: }
755: /*
756: There is no need to explicitly assemble the operator now,
757: the sub-KSP will call the method provided to KSPSetComputeOperators() during KSPSetUp()
758: */
759: }
760: } else {
761: PCTelescopeMatCreate_dmda_dmactivefalse(pc, sred, reuse, A);
762: }
763: return 0;
764: }
766: PetscErrorCode PCTelescopeSubNullSpaceCreate_dmda_Telescope(PC pc, PC_Telescope sred, MatNullSpace nullspace, MatNullSpace *sub_nullspace)
767: {
768: PetscBool has_const;
769: PetscInt i, k, n = 0;
770: const Vec *vecs;
771: Vec *sub_vecs = NULL;
772: MPI_Comm subcomm;
773: PC_Telescope_DMDACtx *ctx;
775: ctx = (PC_Telescope_DMDACtx *)sred->dm_ctx;
776: subcomm = PetscSubcommChild(sred->psubcomm);
777: MatNullSpaceGetVecs(nullspace, &has_const, &n, &vecs);
779: if (PCTelescope_isActiveRank(sred)) {
780: /* create new vectors */
781: if (n) VecDuplicateVecs(sred->xred, n, &sub_vecs);
782: }
784: /* copy entries */
785: for (k = 0; k < n; k++) {
786: const PetscScalar *x_array;
787: PetscScalar *LA_sub_vec;
788: PetscInt st, ed;
790: /* permute vector into ordering associated with re-partitioned dmda */
791: MatMultTranspose(ctx->permutation, vecs[k], ctx->xp);
793: /* pull in vector x->xtmp */
794: VecScatterBegin(sred->scatter, ctx->xp, sred->xtmp, INSERT_VALUES, SCATTER_FORWARD);
795: VecScatterEnd(sred->scatter, ctx->xp, sred->xtmp, INSERT_VALUES, SCATTER_FORWARD);
797: /* copy vector entries into xred */
798: VecGetArrayRead(sred->xtmp, &x_array);
799: if (sub_vecs) {
800: if (sub_vecs[k]) {
801: VecGetOwnershipRange(sub_vecs[k], &st, &ed);
802: VecGetArray(sub_vecs[k], &LA_sub_vec);
803: for (i = 0; i < ed - st; i++) LA_sub_vec[i] = x_array[i];
804: VecRestoreArray(sub_vecs[k], &LA_sub_vec);
805: }
806: }
807: VecRestoreArrayRead(sred->xtmp, &x_array);
808: }
810: if (PCTelescope_isActiveRank(sred)) {
811: /* create new (near) nullspace for redundant object */
812: MatNullSpaceCreate(subcomm, has_const, n, sub_vecs, sub_nullspace);
813: VecDestroyVecs(n, &sub_vecs);
816: }
817: return 0;
818: }
820: PetscErrorCode PCTelescopeMatNullSpaceCreate_dmda(PC pc, PC_Telescope sred, Mat sub_mat)
821: {
822: Mat B;
824: PCGetOperators(pc, NULL, &B);
825: {
826: MatNullSpace nullspace, sub_nullspace;
827: MatGetNullSpace(B, &nullspace);
828: if (nullspace) {
829: PetscInfo(pc, "PCTelescope: generating nullspace (DMDA)\n");
830: PCTelescopeSubNullSpaceCreate_dmda_Telescope(pc, sred, nullspace, &sub_nullspace);
831: if (PCTelescope_isActiveRank(sred)) {
832: MatSetNullSpace(sub_mat, sub_nullspace);
833: MatNullSpaceDestroy(&sub_nullspace);
834: }
835: }
836: }
837: {
838: MatNullSpace nearnullspace, sub_nearnullspace;
839: MatGetNearNullSpace(B, &nearnullspace);
840: if (nearnullspace) {
841: PetscInfo(pc, "PCTelescope: generating near nullspace (DMDA)\n");
842: PCTelescopeSubNullSpaceCreate_dmda_Telescope(pc, sred, nearnullspace, &sub_nearnullspace);
843: if (PCTelescope_isActiveRank(sred)) {
844: MatSetNearNullSpace(sub_mat, sub_nearnullspace);
845: MatNullSpaceDestroy(&sub_nearnullspace);
846: }
847: }
848: }
849: return 0;
850: }
852: PetscErrorCode PCApply_Telescope_dmda(PC pc, Vec x, Vec y)
853: {
854: PC_Telescope sred = (PC_Telescope)pc->data;
855: Mat perm;
856: Vec xtmp, xp, xred, yred;
857: PetscInt i, st, ed;
858: VecScatter scatter;
859: PetscScalar *array;
860: const PetscScalar *x_array;
861: PC_Telescope_DMDACtx *ctx;
863: ctx = (PC_Telescope_DMDACtx *)sred->dm_ctx;
864: xtmp = sred->xtmp;
865: scatter = sred->scatter;
866: xred = sred->xred;
867: yred = sred->yred;
868: perm = ctx->permutation;
869: xp = ctx->xp;
871: PetscCitationsRegister(citation, &cited);
873: /* permute vector into ordering associated with re-partitioned dmda */
874: MatMultTranspose(perm, x, xp);
876: /* pull in vector x->xtmp */
877: VecScatterBegin(scatter, xp, xtmp, INSERT_VALUES, SCATTER_FORWARD);
878: VecScatterEnd(scatter, xp, xtmp, INSERT_VALUES, SCATTER_FORWARD);
880: /* copy vector entries into xred */
881: VecGetArrayRead(xtmp, &x_array);
882: if (xred) {
883: PetscScalar *LA_xred;
884: VecGetOwnershipRange(xred, &st, &ed);
886: VecGetArray(xred, &LA_xred);
887: for (i = 0; i < ed - st; i++) LA_xred[i] = x_array[i];
888: VecRestoreArray(xred, &LA_xred);
889: }
890: VecRestoreArrayRead(xtmp, &x_array);
892: /* solve */
893: if (PCTelescope_isActiveRank(sred)) {
894: KSPSolve(sred->ksp, xred, yred);
895: KSPCheckSolve(sred->ksp, pc, yred);
896: }
898: /* return vector */
899: VecGetArray(xtmp, &array);
900: if (yred) {
901: const PetscScalar *LA_yred;
902: VecGetOwnershipRange(yred, &st, &ed);
903: VecGetArrayRead(yred, &LA_yred);
904: for (i = 0; i < ed - st; i++) array[i] = LA_yred[i];
905: VecRestoreArrayRead(yred, &LA_yred);
906: }
907: VecRestoreArray(xtmp, &array);
908: VecScatterBegin(scatter, xtmp, xp, INSERT_VALUES, SCATTER_REVERSE);
909: VecScatterEnd(scatter, xtmp, xp, INSERT_VALUES, SCATTER_REVERSE);
910: MatMult(perm, xp, y);
911: return 0;
912: }
914: PetscErrorCode PCApplyRichardson_Telescope_dmda(PC pc, Vec x, Vec y, Vec w, PetscReal rtol, PetscReal abstol, PetscReal dtol, PetscInt its, PetscBool zeroguess, PetscInt *outits, PCRichardsonConvergedReason *reason)
915: {
916: PC_Telescope sred = (PC_Telescope)pc->data;
917: Mat perm;
918: Vec xtmp, xp, yred;
919: PetscInt i, st, ed;
920: VecScatter scatter;
921: const PetscScalar *x_array;
922: PetscBool default_init_guess_value = PETSC_FALSE;
923: PC_Telescope_DMDACtx *ctx;
925: ctx = (PC_Telescope_DMDACtx *)sred->dm_ctx;
926: xtmp = sred->xtmp;
927: scatter = sred->scatter;
928: yred = sred->yred;
929: perm = ctx->permutation;
930: xp = ctx->xp;
933: *reason = (PCRichardsonConvergedReason)0;
935: if (!zeroguess) {
936: PetscInfo(pc, "PCTelescopeDMDA: Scattering y for non-zero-initial guess\n");
937: /* permute vector into ordering associated with re-partitioned dmda */
938: MatMultTranspose(perm, y, xp);
940: /* pull in vector x->xtmp */
941: VecScatterBegin(scatter, xp, xtmp, INSERT_VALUES, SCATTER_FORWARD);
942: VecScatterEnd(scatter, xp, xtmp, INSERT_VALUES, SCATTER_FORWARD);
944: /* copy vector entries into xred */
945: VecGetArrayRead(xtmp, &x_array);
946: if (yred) {
947: PetscScalar *LA_yred;
948: VecGetOwnershipRange(yred, &st, &ed);
949: VecGetArray(yred, &LA_yred);
950: for (i = 0; i < ed - st; i++) LA_yred[i] = x_array[i];
951: VecRestoreArray(yred, &LA_yred);
952: }
953: VecRestoreArrayRead(xtmp, &x_array);
954: }
956: if (PCTelescope_isActiveRank(sred)) {
957: KSPGetInitialGuessNonzero(sred->ksp, &default_init_guess_value);
958: if (!zeroguess) KSPSetInitialGuessNonzero(sred->ksp, PETSC_TRUE);
959: }
961: PCApply_Telescope_dmda(pc, x, y);
963: if (PCTelescope_isActiveRank(sred)) KSPSetInitialGuessNonzero(sred->ksp, default_init_guess_value);
965: if (!*reason) *reason = PCRICHARDSON_CONVERGED_ITS;
966: *outits = 1;
967: return 0;
968: }
970: PetscErrorCode PCReset_Telescope_dmda(PC pc)
971: {
972: PC_Telescope sred = (PC_Telescope)pc->data;
973: PC_Telescope_DMDACtx *ctx;
975: ctx = (PC_Telescope_DMDACtx *)sred->dm_ctx;
976: VecDestroy(&ctx->xp);
977: MatDestroy(&ctx->permutation);
978: DMDestroy(&ctx->dmrepart);
979: PetscFree3(ctx->range_i_re, ctx->range_j_re, ctx->range_k_re);
980: PetscFree3(ctx->start_i_re, ctx->start_j_re, ctx->start_k_re);
981: return 0;
982: }
984: PetscErrorCode DMView_DA_Short_3d(DM dm, PetscViewer v)
985: {
986: PetscInt M, N, P, m, n, p, ndof, nsw;
987: MPI_Comm comm;
988: PetscMPIInt size;
989: const char *prefix;
991: PetscObjectGetComm((PetscObject)dm, &comm);
992: MPI_Comm_size(comm, &size);
993: DMGetOptionsPrefix(dm, &prefix);
994: DMDAGetInfo(dm, NULL, &M, &N, &P, &m, &n, &p, &ndof, &nsw, NULL, NULL, NULL, NULL);
995: if (prefix) PetscViewerASCIIPrintf(v, "DMDA Object: (%s) %d MPI processes\n", prefix, size);
996: else PetscViewerASCIIPrintf(v, "DMDA Object: %d MPI processes\n", size);
997: PetscViewerASCIIPrintf(v, " M %" PetscInt_FMT " N %" PetscInt_FMT " P %" PetscInt_FMT " m %" PetscInt_FMT " n %" PetscInt_FMT " p %" PetscInt_FMT " dof %" PetscInt_FMT " overlap %" PetscInt_FMT "\n", M, N, P, m, n, p, ndof, nsw);
998: return 0;
999: }
1001: PetscErrorCode DMView_DA_Short_2d(DM dm, PetscViewer v)
1002: {
1003: PetscInt M, N, m, n, ndof, nsw;
1004: MPI_Comm comm;
1005: PetscMPIInt size;
1006: const char *prefix;
1008: PetscObjectGetComm((PetscObject)dm, &comm);
1009: MPI_Comm_size(comm, &size);
1010: DMGetOptionsPrefix(dm, &prefix);
1011: DMDAGetInfo(dm, NULL, &M, &N, NULL, &m, &n, NULL, &ndof, &nsw, NULL, NULL, NULL, NULL);
1012: if (prefix) PetscViewerASCIIPrintf(v, "DMDA Object: (%s) %d MPI processes\n", prefix, size);
1013: else PetscViewerASCIIPrintf(v, "DMDA Object: %d MPI processes\n", size);
1014: PetscViewerASCIIPrintf(v, " M %" PetscInt_FMT " N %" PetscInt_FMT " m %" PetscInt_FMT " n %" PetscInt_FMT " dof %" PetscInt_FMT " overlap %" PetscInt_FMT "\n", M, N, m, n, ndof, nsw);
1015: return 0;
1016: }
1018: PetscErrorCode DMView_DA_Short(DM dm, PetscViewer v)
1019: {
1020: PetscInt dim;
1022: DMDAGetInfo(dm, &dim, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
1023: switch (dim) {
1024: case 2:
1025: DMView_DA_Short_2d(dm, v);
1026: break;
1027: case 3:
1028: DMView_DA_Short_3d(dm, v);
1029: break;
1030: }
1031: return 0;
1032: }