Actual source code: ex9.c
1: static char help[] = "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
2: 2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
3: required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
4: To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";
6: #include <petscvec.h>
7: int main(int argc, char **argv)
8: {
9: PetscMPIInt nproc, grank, mycolor;
10: PetscInt i, n, N = 20, low, high;
11: MPI_Comm subcomm;
12: Vec x = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
13: Vec yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
14: VecScatter vscat;
15: IS ix, iy;
16: PetscBool iscuda = PETSC_FALSE; /* Option to use VECCUDA vectors */
17: PetscBool optionflag, compareflag;
18: char vectypename[PETSC_MAX_PATH_LEN];
19: PetscBool world2sub = PETSC_FALSE; /* Copy a vector from WORLD to a subcomm? */
20: PetscBool sub2sub = PETSC_FALSE; /* Copy a vector from a subcomm to another subcomm? */
21: PetscBool world2subs = PETSC_FALSE; /* Copy a vector from WORLD to multiple subcomms? */
24: PetscInitialize(&argc, &argv, (char *)0, help);
25: MPI_Comm_size(PETSC_COMM_WORLD, &nproc);
26: MPI_Comm_rank(PETSC_COMM_WORLD, &grank);
30: PetscOptionsGetBool(NULL, 0, "-world2sub", &world2sub, NULL);
31: PetscOptionsGetBool(NULL, 0, "-sub2sub", &sub2sub, NULL);
32: PetscOptionsGetBool(NULL, 0, "-world2subs", &world2subs, NULL);
33: PetscOptionsGetString(NULL, NULL, "-vectype", vectypename, sizeof(vectypename), &optionflag);
34: if (optionflag) {
35: PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag);
36: if (compareflag) iscuda = PETSC_TRUE;
37: }
39: /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
40: mycolor = grank % 3;
41: MPI_Comm_split(PETSC_COMM_WORLD, mycolor, grank, &subcomm);
43: /*===========================================================================
44: * Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
45: * a subcommunicator of PETSC_COMM_WORLD and vice versa.
46: *===========================================================================*/
47: if (world2sub) {
48: VecCreate(PETSC_COMM_WORLD, &x);
49: VecSetSizes(x, PETSC_DECIDE, N);
50: if (iscuda) {
51: VecSetType(x, VECCUDA);
52: } else {
53: VecSetType(x, VECSTANDARD);
54: }
55: VecSetUp(x);
56: PetscObjectSetName((PetscObject)x, "x_commworld"); /* Give a name to view x clearly */
58: /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
59: VecGetOwnershipRange(x, &low, &high);
60: for (i = low; i < high; i++) {
61: PetscScalar val = -i;
62: VecSetValue(x, i, val, INSERT_VALUES);
63: }
64: VecAssemblyBegin(x);
65: VecAssemblyEnd(x);
67: /* Transfer x to a vector y only defined on subcomm0 and vice versa */
68: if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
69: Vec y;
70: PetscScalar *yvalue;
71: VecCreate(subcomm, &y);
72: VecSetSizes(y, PETSC_DECIDE, N);
73: if (iscuda) {
74: VecSetType(y, VECCUDA);
75: } else {
76: VecSetType(y, VECSTANDARD);
77: }
78: VecSetUp(y);
79: PetscObjectSetName((PetscObject)y, "y_subcomm_0"); /* Give a name to view y clearly */
80: VecGetLocalSize(y, &n);
81: if (iscuda) {
82: #if defined(PETSC_HAVE_CUDA)
83: VecCUDAGetArray(y, &yvalue);
84: #endif
85: } else {
86: VecGetArray(y, &yvalue);
87: }
88: /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
89: Note this is a collective call. All processes have to call it and supply consistent N.
90: */
91: if (iscuda) {
92: #if defined(PETSC_HAVE_CUDA)
93: VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, n, N, yvalue, &yg);
94: #endif
95: } else {
96: VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, yvalue, &yg);
97: }
99: /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
100: VecGetOwnershipRange(yg, &low, &high); /* low, high are global indices */
101: ISCreateStride(PETSC_COMM_SELF, high - low, low, 1, &ix);
102: ISDuplicate(ix, &iy);
104: /* Union of ix's on subcomm0 covers the full range of [0,N) */
105: VecScatterCreate(x, ix, yg, iy, &vscat);
106: VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
107: VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
109: /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
110: VecGetArray must be paired with VecRestoreArray.
111: */
112: if (iscuda) {
113: #if defined(PETSC_HAVE_CUDA)
114: VecCUDARestoreArray(y, &yvalue);
115: #endif
116: } else {
117: VecRestoreArray(y, &yvalue);
118: }
120: /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
121: VecView(y, PETSC_VIEWER_STDOUT_(subcomm));
122: VecScale(y, 2.0);
124: /* Send the new y back to x */
125: VecGetArray(y, &yvalue); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
126: /* Supply new yvalue to yg without memory copying */
127: VecPlaceArray(yg, yvalue);
128: VecScatterBegin(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE);
129: VecScatterEnd(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE);
130: VecResetArray(yg);
131: if (iscuda) {
132: #if defined(PETSC_HAVE_CUDA)
133: VecCUDARestoreArray(y, &yvalue);
134: #endif
135: } else {
136: VecRestoreArray(y, &yvalue);
137: }
139: VecDestroy(&y);
140: } else {
141: /* Ranks outside of subcomm0 do not supply values to yg */
142: if (iscuda) {
143: #if defined(PETSC_HAVE_CUDA)
144: VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, 0 /*n*/, N, NULL, &yg);
145: #endif
146: } else {
147: VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, 0 /*n*/, N, NULL, &yg);
148: }
150: /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
151: ranks just need to create empty ISes to cheat VecScatterCreate.
152: */
153: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &ix);
154: ISDuplicate(ix, &iy);
156: VecScatterCreate(x, ix, yg, iy, &vscat);
157: VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
158: VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
160: /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
161: But they have to call VecScatterBegin/End since these routines are collective.
162: */
163: VecScatterBegin(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE);
164: VecScatterEnd(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE);
165: }
167: VecView(x, PETSC_VIEWER_STDOUT_WORLD);
168: ISDestroy(&ix);
169: ISDestroy(&iy);
170: VecDestroy(&x);
171: VecDestroy(&yg);
172: VecScatterDestroy(&vscat);
173: } /* world2sub */
175: /*===========================================================================
176: * Transfer a vector x defined on subcomm0 to a vector y defined on
177: * subcomm1. The two subcomms are not overlapping and their union is
178: * not necessarily equal to PETSC_COMM_WORLD.
179: *===========================================================================*/
180: if (sub2sub) {
181: if (mycolor == 0) {
182: /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
183: PetscInt n, N = 22;
184: Vec x, xg, yg;
185: IS ix, iy;
186: VecScatter vscat;
187: const PetscScalar *xvalue;
188: MPI_Comm intercomm, parentcomm;
189: PetscMPIInt lrank;
191: MPI_Comm_rank(subcomm, &lrank);
192: /* x is on subcomm */
193: VecCreate(subcomm, &x);
194: VecSetSizes(x, PETSC_DECIDE, N);
195: if (iscuda) {
196: VecSetType(x, VECCUDA);
197: } else {
198: VecSetType(x, VECSTANDARD);
199: }
200: VecSetUp(x);
201: VecGetOwnershipRange(x, &low, &high);
203: /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
204: for (i = low; i < high; i++) {
205: PetscScalar val = i;
206: VecSetValue(x, i, val, INSERT_VALUES);
207: }
208: VecAssemblyBegin(x);
209: VecAssemblyEnd(x);
211: MPI_Intercomm_create(subcomm, 0, PETSC_COMM_WORLD /*peer_comm*/, 1, 100 /*tag*/, &intercomm);
213: /* Tell rank 0 of subcomm1 the global size of x */
214: if (!lrank) MPI_Send(&N, 1, MPIU_INT, 0 /*receiver's rank in remote comm, i.e., subcomm1*/, 200 /*tag*/, intercomm);
216: /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
217: But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
218: */
219: MPI_Intercomm_merge(intercomm, 0 /*low*/, &parentcomm);
221: /* Create a vector xg on parentcomm, which shares memory with x */
222: VecGetLocalSize(x, &n);
223: if (iscuda) {
224: #if defined(PETSC_HAVE_CUDA)
225: VecCUDAGetArrayRead(x, &xvalue);
226: VecCreateMPICUDAWithArray(parentcomm, 1, n, N, xvalue, &xg);
227: #endif
228: } else {
229: VecGetArrayRead(x, &xvalue);
230: VecCreateMPIWithArray(parentcomm, 1, n, N, xvalue, &xg);
231: }
233: /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
234: if (iscuda) {
235: #if defined(PETSC_HAVE_CUDA)
236: VecCreateMPICUDAWithArray(parentcomm, 1, 0 /*n*/, N, NULL /*array*/, &yg);
237: #endif
238: } else {
239: VecCreateMPIWithArray(parentcomm, 1, 0 /*n*/, N, NULL /*array*/, &yg);
240: }
242: /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
243: VecGetOwnershipRange(xg, &low, &high); /* low, high are global indices of xg */
244: ISCreateStride(PETSC_COMM_SELF, high - low, low, 1, &ix);
245: ISDuplicate(ix, &iy);
246: VecScatterCreate(xg, ix, yg, iy, &vscat);
248: /* Scatter values from xg to yg */
249: VecScatterBegin(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD);
250: VecScatterEnd(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD);
252: /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
253: if (iscuda) {
254: #if defined(PETSC_HAVE_CUDA)
255: VecCUDARestoreArrayRead(x, &xvalue);
256: #endif
257: } else {
258: VecRestoreArrayRead(x, &xvalue);
259: }
260: VecDestroy(&x);
261: ISDestroy(&ix);
262: ISDestroy(&iy);
263: VecDestroy(&xg);
264: VecDestroy(&yg);
265: VecScatterDestroy(&vscat);
266: MPI_Comm_free(&intercomm);
267: MPI_Comm_free(&parentcomm);
268: } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
269: PetscInt n, N;
270: Vec y, xg, yg;
271: IS ix, iy;
272: VecScatter vscat;
273: PetscScalar *yvalue;
274: MPI_Comm intercomm, parentcomm;
275: PetscMPIInt lrank;
277: MPI_Comm_rank(subcomm, &lrank);
278: MPI_Intercomm_create(subcomm, 0, PETSC_COMM_WORLD /*peer_comm*/, 0 /*remote_leader*/, 100 /*tag*/, &intercomm);
280: /* Two rank-0 are talking */
281: if (!lrank) MPI_Recv(&N, 1, MPIU_INT, 0 /*sender's rank in remote comm, i.e. subcomm0*/, 200 /*tag*/, intercomm, MPI_STATUS_IGNORE);
282: /* Rank 0 of subcomm1 bcasts N to its members */
283: MPI_Bcast(&N, 1, MPIU_INT, 0 /*local root*/, subcomm);
285: /* Create a intracomm Petsc can work on */
286: MPI_Intercomm_merge(intercomm, 1 /*high*/, &parentcomm);
288: /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
289: if (iscuda) {
290: #if defined(PETSC_HAVE_CUDA)
291: VecCreateMPICUDAWithArray(parentcomm, 1 /*bs*/, 0 /*n*/, N, NULL /*array*/, &xg);
292: #endif
293: } else {
294: VecCreateMPIWithArray(parentcomm, 1 /*bs*/, 0 /*n*/, N, NULL /*array*/, &xg);
295: }
297: VecCreate(subcomm, &y);
298: VecSetSizes(y, PETSC_DECIDE, N);
299: if (iscuda) {
300: VecSetType(y, VECCUDA);
301: } else {
302: VecSetType(y, VECSTANDARD);
303: }
304: VecSetUp(y);
306: PetscObjectSetName((PetscObject)y, "y_subcomm_1"); /* Give a name to view y clearly */
307: VecGetLocalSize(y, &n);
308: if (iscuda) {
309: #if defined(PETSC_HAVE_CUDA)
310: VecCUDAGetArray(y, &yvalue);
311: #endif
312: } else {
313: VecGetArray(y, &yvalue);
314: }
315: /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
316: created in the same order in subcomm0/1. For example, we can not reverse the order of
317: creating xg and yg in subcomm1.
318: */
319: if (iscuda) {
320: #if defined(PETSC_HAVE_CUDA)
321: VecCreateMPICUDAWithArray(parentcomm, 1 /*bs*/, n, N, yvalue, &yg);
322: #endif
323: } else {
324: VecCreateMPIWithArray(parentcomm, 1 /*bs*/, n, N, yvalue, &yg);
325: }
327: /* Ranks in subcomm0 already specified the full range of the identity map.
328: ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
329: */
330: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &ix);
331: ISDuplicate(ix, &iy);
332: VecScatterCreate(xg, ix, yg, iy, &vscat);
334: /* Scatter values from xg to yg */
335: VecScatterBegin(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD);
336: VecScatterEnd(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD);
338: /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
339: if (iscuda) {
340: #if defined(PETSC_HAVE_CUDA)
341: VecCUDARestoreArray(y, &yvalue);
342: #endif
343: } else {
344: VecRestoreArray(y, &yvalue);
345: }
347: /* Libraries on subcomm1 can safely use y now, for example, view it */
348: VecView(y, PETSC_VIEWER_STDOUT_(subcomm));
350: VecDestroy(&y);
351: ISDestroy(&ix);
352: ISDestroy(&iy);
353: VecDestroy(&xg);
354: VecDestroy(&yg);
355: VecScatterDestroy(&vscat);
356: MPI_Comm_free(&intercomm);
357: MPI_Comm_free(&parentcomm);
358: } else if (mycolor == 2) { /* subcomm2 */
359: /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
360: }
361: } /* sub2sub */
363: /*===========================================================================
364: * Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
365: * every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
366: * as we did in case 1, but that is not efficient. Instead, we use one vecscatter
367: * to achieve that.
368: *===========================================================================*/
369: if (world2subs) {
370: Vec y;
371: PetscInt n, N = 15, xstart, ystart, low, high;
372: PetscScalar *yvalue;
374: /* Initialize x to [0, 1, 2, 3, ..., N-1] */
375: VecCreate(PETSC_COMM_WORLD, &x);
376: VecSetSizes(x, PETSC_DECIDE, N);
377: if (iscuda) {
378: VecSetType(x, VECCUDA);
379: } else {
380: VecSetType(x, VECSTANDARD);
381: }
382: VecSetUp(x);
383: VecGetOwnershipRange(x, &low, &high);
384: for (i = low; i < high; i++) VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES);
385: VecAssemblyBegin(x);
386: VecAssemblyEnd(x);
388: /* Every subcomm has a y as long as x */
389: VecCreate(subcomm, &y);
390: VecSetSizes(y, PETSC_DECIDE, N);
391: if (iscuda) {
392: VecSetType(y, VECCUDA);
393: } else {
394: VecSetType(y, VECSTANDARD);
395: }
396: VecSetUp(y);
397: VecGetLocalSize(y, &n);
399: /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
400: Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
401: necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
402: 0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
403: */
404: if (iscuda) {
405: #if defined(PETSC_HAVE_CUDA)
406: VecCUDAGetArray(y, &yvalue);
407: VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, n, PETSC_DECIDE, yvalue, &yg);
408: #endif
409: } else {
410: VecGetArray(y, &yvalue);
411: VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, PETSC_DECIDE, yvalue, &yg);
412: }
413: PetscObjectSetName((PetscObject)yg, "yg_on_subcomms"); /* Give a name to view yg clearly */
415: /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
416: since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
417: */
418: VecGetOwnershipRange(y, &xstart, NULL);
419: VecGetOwnershipRange(yg, &ystart, NULL);
421: ISCreateStride(PETSC_COMM_SELF, n, xstart, 1, &ix);
422: ISCreateStride(PETSC_COMM_SELF, n, ystart, 1, &iy);
423: VecScatterCreate(x, ix, yg, iy, &vscat);
424: VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
425: VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
427: /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
428: VecView(yg, PETSC_VIEWER_STDOUT_WORLD);
429: VecDestroy(&yg);
431: /* Restory yvalue so that processes in subcomm can use y from now on. */
432: if (iscuda) {
433: #if defined(PETSC_HAVE_CUDA)
434: VecCUDARestoreArray(y, &yvalue);
435: #endif
436: } else {
437: VecRestoreArray(y, &yvalue);
438: }
439: VecScale(y, 3.0);
441: ISDestroy(&ix); /* One can also destroy ix, iy immediately after VecScatterCreate() */
442: ISDestroy(&iy);
443: VecDestroy(&x);
444: VecDestroy(&y);
445: VecScatterDestroy(&vscat);
446: } /* world2subs */
448: MPI_Comm_free(&subcomm);
449: PetscFinalize();
450: return 0;
451: }
453: /*TEST
455: build:
456: requires: !defined(PETSC_HAVE_MPIUNI)
458: testset:
459: nsize: 7
461: test:
462: suffix: 1
463: args: -world2sub
465: test:
466: suffix: 2
467: args: -sub2sub
468: # deadlocks with NECMPI and INTELMPI (20210400300)
469: requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION)
471: test:
472: suffix: 3
473: args: -world2subs
475: test:
476: suffix: 4
477: args: -world2sub -vectype cuda
478: requires: cuda
480: test:
481: suffix: 5
482: args: -sub2sub -vectype cuda
483: requires: cuda
485: test:
486: suffix: 6
487: args: -world2subs -vectype cuda
488: requires: cuda
490: test:
491: suffix: 7
492: args: -world2sub -sf_type neighbor
493: output_file: output/ex9_1.out
494: # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
495: # segfaults with NECMPI
496: requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI)
497: TEST*/