Actual source code: ex9.c

  1: static char help[] = "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
  2:   2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
  3:   required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
  4:   To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";

  6: #include <petscvec.h>
  7: int main(int argc, char **argv)
  8: {
  9:   PetscMPIInt nproc, grank, mycolor;
 10:   PetscInt    i, n, N = 20, low, high;
 11:   MPI_Comm    subcomm;
 12:   Vec         x  = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
 13:   Vec         yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
 14:   VecScatter  vscat;
 15:   IS          ix, iy;
 16:   PetscBool   iscuda = PETSC_FALSE; /* Option to use VECCUDA vectors */
 17:   PetscBool   optionflag, compareflag;
 18:   char        vectypename[PETSC_MAX_PATH_LEN];
 19:   PetscBool   world2sub  = PETSC_FALSE; /* Copy a vector from WORLD to a subcomm? */
 20:   PetscBool   sub2sub    = PETSC_FALSE; /* Copy a vector from a subcomm to another subcomm? */
 21:   PetscBool   world2subs = PETSC_FALSE; /* Copy a vector from WORLD to multiple subcomms? */

 24:   PetscInitialize(&argc, &argv, (char *)0, help);
 25:   MPI_Comm_size(PETSC_COMM_WORLD, &nproc);
 26:   MPI_Comm_rank(PETSC_COMM_WORLD, &grank);


 30:   PetscOptionsGetBool(NULL, 0, "-world2sub", &world2sub, NULL);
 31:   PetscOptionsGetBool(NULL, 0, "-sub2sub", &sub2sub, NULL);
 32:   PetscOptionsGetBool(NULL, 0, "-world2subs", &world2subs, NULL);
 33:   PetscOptionsGetString(NULL, NULL, "-vectype", vectypename, sizeof(vectypename), &optionflag);
 34:   if (optionflag) {
 35:     PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag);
 36:     if (compareflag) iscuda = PETSC_TRUE;
 37:   }

 39:   /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
 40:   mycolor = grank % 3;
 41:   MPI_Comm_split(PETSC_COMM_WORLD, mycolor, grank, &subcomm);

 43:   /*===========================================================================
 44:    *  Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
 45:    *  a subcommunicator of PETSC_COMM_WORLD and vice versa.
 46:    *===========================================================================*/
 47:   if (world2sub) {
 48:     VecCreate(PETSC_COMM_WORLD, &x);
 49:     VecSetSizes(x, PETSC_DECIDE, N);
 50:     if (iscuda) {
 51:       VecSetType(x, VECCUDA);
 52:     } else {
 53:       VecSetType(x, VECSTANDARD);
 54:     }
 55:     VecSetUp(x);
 56:     PetscObjectSetName((PetscObject)x, "x_commworld"); /* Give a name to view x clearly */

 58:     /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
 59:     VecGetOwnershipRange(x, &low, &high);
 60:     for (i = low; i < high; i++) {
 61:       PetscScalar val = -i;
 62:       VecSetValue(x, i, val, INSERT_VALUES);
 63:     }
 64:     VecAssemblyBegin(x);
 65:     VecAssemblyEnd(x);

 67:     /* Transfer x to a vector y only defined on subcomm0 and vice versa */
 68:     if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
 69:       Vec          y;
 70:       PetscScalar *yvalue;
 71:       VecCreate(subcomm, &y);
 72:       VecSetSizes(y, PETSC_DECIDE, N);
 73:       if (iscuda) {
 74:         VecSetType(y, VECCUDA);
 75:       } else {
 76:         VecSetType(y, VECSTANDARD);
 77:       }
 78:       VecSetUp(y);
 79:       PetscObjectSetName((PetscObject)y, "y_subcomm_0"); /* Give a name to view y clearly */
 80:       VecGetLocalSize(y, &n);
 81:       if (iscuda) {
 82: #if defined(PETSC_HAVE_CUDA)
 83:         VecCUDAGetArray(y, &yvalue);
 84: #endif
 85:       } else {
 86:         VecGetArray(y, &yvalue);
 87:       }
 88:       /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
 89:         Note this is a collective call. All processes have to call it and supply consistent N.
 90:       */
 91:       if (iscuda) {
 92: #if defined(PETSC_HAVE_CUDA)
 93:         VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, n, N, yvalue, &yg);
 94: #endif
 95:       } else {
 96:         VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, yvalue, &yg);
 97:       }

 99:       /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
100:       VecGetOwnershipRange(yg, &low, &high); /* low, high are global indices */
101:       ISCreateStride(PETSC_COMM_SELF, high - low, low, 1, &ix);
102:       ISDuplicate(ix, &iy);

104:       /* Union of ix's on subcomm0 covers the full range of [0,N) */
105:       VecScatterCreate(x, ix, yg, iy, &vscat);
106:       VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
107:       VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);

109:       /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
110:         VecGetArray must be paired with VecRestoreArray.
111:       */
112:       if (iscuda) {
113: #if defined(PETSC_HAVE_CUDA)
114:         VecCUDARestoreArray(y, &yvalue);
115: #endif
116:       } else {
117:         VecRestoreArray(y, &yvalue);
118:       }

120:       /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
121:       VecView(y, PETSC_VIEWER_STDOUT_(subcomm));
122:       VecScale(y, 2.0);

124:       /* Send the new y back to x */
125:       VecGetArray(y, &yvalue); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
126:       /* Supply new yvalue to yg without memory copying */
127:       VecPlaceArray(yg, yvalue);
128:       VecScatterBegin(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE);
129:       VecScatterEnd(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE);
130:       VecResetArray(yg);
131:       if (iscuda) {
132: #if defined(PETSC_HAVE_CUDA)
133:         VecCUDARestoreArray(y, &yvalue);
134: #endif
135:       } else {
136:         VecRestoreArray(y, &yvalue);
137:       }

139:       VecDestroy(&y);
140:     } else {
141:       /* Ranks outside of subcomm0 do not supply values to yg */
142:       if (iscuda) {
143: #if defined(PETSC_HAVE_CUDA)
144:         VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, 0 /*n*/, N, NULL, &yg);
145: #endif
146:       } else {
147:         VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, 0 /*n*/, N, NULL, &yg);
148:       }

150:       /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
151:         ranks just need to create empty ISes to cheat VecScatterCreate.
152:       */
153:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &ix);
154:       ISDuplicate(ix, &iy);

156:       VecScatterCreate(x, ix, yg, iy, &vscat);
157:       VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
158:       VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);

160:       /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
161:         But they have to call VecScatterBegin/End since these routines are collective.
162:       */
163:       VecScatterBegin(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE);
164:       VecScatterEnd(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE);
165:     }

167:     VecView(x, PETSC_VIEWER_STDOUT_WORLD);
168:     ISDestroy(&ix);
169:     ISDestroy(&iy);
170:     VecDestroy(&x);
171:     VecDestroy(&yg);
172:     VecScatterDestroy(&vscat);
173:   } /* world2sub */

175:   /*===========================================================================
176:    *  Transfer a vector x defined on subcomm0 to a vector y defined on
177:    *  subcomm1. The two subcomms are not overlapping and their union is
178:    *  not necessarily equal to PETSC_COMM_WORLD.
179:    *===========================================================================*/
180:   if (sub2sub) {
181:     if (mycolor == 0) {
182:       /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
183:       PetscInt           n, N = 22;
184:       Vec                x, xg, yg;
185:       IS                 ix, iy;
186:       VecScatter         vscat;
187:       const PetscScalar *xvalue;
188:       MPI_Comm           intercomm, parentcomm;
189:       PetscMPIInt        lrank;

191:       MPI_Comm_rank(subcomm, &lrank);
192:       /* x is on subcomm */
193:       VecCreate(subcomm, &x);
194:       VecSetSizes(x, PETSC_DECIDE, N);
195:       if (iscuda) {
196:         VecSetType(x, VECCUDA);
197:       } else {
198:         VecSetType(x, VECSTANDARD);
199:       }
200:       VecSetUp(x);
201:       VecGetOwnershipRange(x, &low, &high);

203:       /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
204:       for (i = low; i < high; i++) {
205:         PetscScalar val = i;
206:         VecSetValue(x, i, val, INSERT_VALUES);
207:       }
208:       VecAssemblyBegin(x);
209:       VecAssemblyEnd(x);

211:       MPI_Intercomm_create(subcomm, 0, PETSC_COMM_WORLD /*peer_comm*/, 1, 100 /*tag*/, &intercomm);

213:       /* Tell rank 0 of subcomm1 the global size of x */
214:       if (!lrank) MPI_Send(&N, 1, MPIU_INT, 0 /*receiver's rank in remote comm, i.e., subcomm1*/, 200 /*tag*/, intercomm);

216:       /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
217:         But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
218:       */
219:       MPI_Intercomm_merge(intercomm, 0 /*low*/, &parentcomm);

221:       /* Create a vector xg on parentcomm, which shares memory with x */
222:       VecGetLocalSize(x, &n);
223:       if (iscuda) {
224: #if defined(PETSC_HAVE_CUDA)
225:         VecCUDAGetArrayRead(x, &xvalue);
226:         VecCreateMPICUDAWithArray(parentcomm, 1, n, N, xvalue, &xg);
227: #endif
228:       } else {
229:         VecGetArrayRead(x, &xvalue);
230:         VecCreateMPIWithArray(parentcomm, 1, n, N, xvalue, &xg);
231:       }

233:       /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
234:       if (iscuda) {
235: #if defined(PETSC_HAVE_CUDA)
236:         VecCreateMPICUDAWithArray(parentcomm, 1, 0 /*n*/, N, NULL /*array*/, &yg);
237: #endif
238:       } else {
239:         VecCreateMPIWithArray(parentcomm, 1, 0 /*n*/, N, NULL /*array*/, &yg);
240:       }

242:       /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
243:       VecGetOwnershipRange(xg, &low, &high); /* low, high are global indices of xg */
244:       ISCreateStride(PETSC_COMM_SELF, high - low, low, 1, &ix);
245:       ISDuplicate(ix, &iy);
246:       VecScatterCreate(xg, ix, yg, iy, &vscat);

248:       /* Scatter values from xg to yg */
249:       VecScatterBegin(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD);
250:       VecScatterEnd(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD);

252:       /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
253:       if (iscuda) {
254: #if defined(PETSC_HAVE_CUDA)
255:         VecCUDARestoreArrayRead(x, &xvalue);
256: #endif
257:       } else {
258:         VecRestoreArrayRead(x, &xvalue);
259:       }
260:       VecDestroy(&x);
261:       ISDestroy(&ix);
262:       ISDestroy(&iy);
263:       VecDestroy(&xg);
264:       VecDestroy(&yg);
265:       VecScatterDestroy(&vscat);
266:       MPI_Comm_free(&intercomm);
267:       MPI_Comm_free(&parentcomm);
268:     } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
269:       PetscInt     n, N;
270:       Vec          y, xg, yg;
271:       IS           ix, iy;
272:       VecScatter   vscat;
273:       PetscScalar *yvalue;
274:       MPI_Comm     intercomm, parentcomm;
275:       PetscMPIInt  lrank;

277:       MPI_Comm_rank(subcomm, &lrank);
278:       MPI_Intercomm_create(subcomm, 0, PETSC_COMM_WORLD /*peer_comm*/, 0 /*remote_leader*/, 100 /*tag*/, &intercomm);

280:       /* Two rank-0 are talking */
281:       if (!lrank) MPI_Recv(&N, 1, MPIU_INT, 0 /*sender's rank in remote comm, i.e. subcomm0*/, 200 /*tag*/, intercomm, MPI_STATUS_IGNORE);
282:       /* Rank 0 of subcomm1 bcasts N to its members */
283:       MPI_Bcast(&N, 1, MPIU_INT, 0 /*local root*/, subcomm);

285:       /* Create a intracomm Petsc can work on */
286:       MPI_Intercomm_merge(intercomm, 1 /*high*/, &parentcomm);

288:       /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
289:       if (iscuda) {
290: #if defined(PETSC_HAVE_CUDA)
291:         VecCreateMPICUDAWithArray(parentcomm, 1 /*bs*/, 0 /*n*/, N, NULL /*array*/, &xg);
292: #endif
293:       } else {
294:         VecCreateMPIWithArray(parentcomm, 1 /*bs*/, 0 /*n*/, N, NULL /*array*/, &xg);
295:       }

297:       VecCreate(subcomm, &y);
298:       VecSetSizes(y, PETSC_DECIDE, N);
299:       if (iscuda) {
300:         VecSetType(y, VECCUDA);
301:       } else {
302:         VecSetType(y, VECSTANDARD);
303:       }
304:       VecSetUp(y);

306:       PetscObjectSetName((PetscObject)y, "y_subcomm_1"); /* Give a name to view y clearly */
307:       VecGetLocalSize(y, &n);
308:       if (iscuda) {
309: #if defined(PETSC_HAVE_CUDA)
310:         VecCUDAGetArray(y, &yvalue);
311: #endif
312:       } else {
313:         VecGetArray(y, &yvalue);
314:       }
315:       /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
316:         created in the same order in subcomm0/1. For example, we can not reverse the order of
317:         creating xg and yg in subcomm1.
318:       */
319:       if (iscuda) {
320: #if defined(PETSC_HAVE_CUDA)
321:         VecCreateMPICUDAWithArray(parentcomm, 1 /*bs*/, n, N, yvalue, &yg);
322: #endif
323:       } else {
324:         VecCreateMPIWithArray(parentcomm, 1 /*bs*/, n, N, yvalue, &yg);
325:       }

327:       /* Ranks in subcomm0 already specified the full range of the identity map.
328:         ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
329:       */
330:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &ix);
331:       ISDuplicate(ix, &iy);
332:       VecScatterCreate(xg, ix, yg, iy, &vscat);

334:       /* Scatter values from xg to yg */
335:       VecScatterBegin(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD);
336:       VecScatterEnd(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD);

338:       /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
339:       if (iscuda) {
340: #if defined(PETSC_HAVE_CUDA)
341:         VecCUDARestoreArray(y, &yvalue);
342: #endif
343:       } else {
344:         VecRestoreArray(y, &yvalue);
345:       }

347:       /* Libraries on subcomm1 can safely use y now, for example, view it */
348:       VecView(y, PETSC_VIEWER_STDOUT_(subcomm));

350:       VecDestroy(&y);
351:       ISDestroy(&ix);
352:       ISDestroy(&iy);
353:       VecDestroy(&xg);
354:       VecDestroy(&yg);
355:       VecScatterDestroy(&vscat);
356:       MPI_Comm_free(&intercomm);
357:       MPI_Comm_free(&parentcomm);
358:     } else if (mycolor == 2) { /* subcomm2 */
359:       /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
360:     }
361:   } /* sub2sub */

363:   /*===========================================================================
364:    *  Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
365:    *  every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
366:    *  as we did in case 1, but that is not efficient. Instead, we use one vecscatter
367:    *  to achieve that.
368:    *===========================================================================*/
369:   if (world2subs) {
370:     Vec          y;
371:     PetscInt     n, N = 15, xstart, ystart, low, high;
372:     PetscScalar *yvalue;

374:     /* Initialize x to [0, 1, 2, 3, ..., N-1] */
375:     VecCreate(PETSC_COMM_WORLD, &x);
376:     VecSetSizes(x, PETSC_DECIDE, N);
377:     if (iscuda) {
378:       VecSetType(x, VECCUDA);
379:     } else {
380:       VecSetType(x, VECSTANDARD);
381:     }
382:     VecSetUp(x);
383:     VecGetOwnershipRange(x, &low, &high);
384:     for (i = low; i < high; i++) VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES);
385:     VecAssemblyBegin(x);
386:     VecAssemblyEnd(x);

388:     /* Every subcomm has a y as long as x */
389:     VecCreate(subcomm, &y);
390:     VecSetSizes(y, PETSC_DECIDE, N);
391:     if (iscuda) {
392:       VecSetType(y, VECCUDA);
393:     } else {
394:       VecSetType(y, VECSTANDARD);
395:     }
396:     VecSetUp(y);
397:     VecGetLocalSize(y, &n);

399:     /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
400:        Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
401:        necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
402:        0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
403:     */
404:     if (iscuda) {
405: #if defined(PETSC_HAVE_CUDA)
406:       VecCUDAGetArray(y, &yvalue);
407:       VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, n, PETSC_DECIDE, yvalue, &yg);
408: #endif
409:     } else {
410:       VecGetArray(y, &yvalue);
411:       VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, PETSC_DECIDE, yvalue, &yg);
412:     }
413:     PetscObjectSetName((PetscObject)yg, "yg_on_subcomms"); /* Give a name to view yg clearly */

415:     /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
416:        since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
417:      */
418:     VecGetOwnershipRange(y, &xstart, NULL);
419:     VecGetOwnershipRange(yg, &ystart, NULL);

421:     ISCreateStride(PETSC_COMM_SELF, n, xstart, 1, &ix);
422:     ISCreateStride(PETSC_COMM_SELF, n, ystart, 1, &iy);
423:     VecScatterCreate(x, ix, yg, iy, &vscat);
424:     VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);
425:     VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD);

427:     /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
428:     VecView(yg, PETSC_VIEWER_STDOUT_WORLD);
429:     VecDestroy(&yg);

431:     /* Restory yvalue so that processes in subcomm can use y from now on. */
432:     if (iscuda) {
433: #if defined(PETSC_HAVE_CUDA)
434:       VecCUDARestoreArray(y, &yvalue);
435: #endif
436:     } else {
437:       VecRestoreArray(y, &yvalue);
438:     }
439:     VecScale(y, 3.0);

441:     ISDestroy(&ix); /* One can also destroy ix, iy immediately after VecScatterCreate() */
442:     ISDestroy(&iy);
443:     VecDestroy(&x);
444:     VecDestroy(&y);
445:     VecScatterDestroy(&vscat);
446:   } /* world2subs */

448:   MPI_Comm_free(&subcomm);
449:   PetscFinalize();
450:   return 0;
451: }

453: /*TEST

455:    build:
456:      requires: !defined(PETSC_HAVE_MPIUNI)

458:    testset:
459:      nsize: 7

461:      test:
462:        suffix: 1
463:        args: -world2sub

465:      test:
466:        suffix: 2
467:        args: -sub2sub
468:        # deadlocks with NECMPI and INTELMPI (20210400300)
469:        requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI_NUMVERSION)

471:      test:
472:        suffix: 3
473:        args: -world2subs

475:      test:
476:        suffix: 4
477:        args: -world2sub -vectype cuda
478:        requires: cuda

480:      test:
481:        suffix: 5
482:        args: -sub2sub -vectype cuda
483:        requires: cuda

485:      test:
486:       suffix: 6
487:       args: -world2subs -vectype cuda
488:       requires: cuda

490:      test:
491:        suffix: 7
492:        args: -world2sub -sf_type neighbor
493:        output_file: output/ex9_1.out
494:        # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
495:        # segfaults with NECMPI
496:        requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_OMPI_MAJOR_VERSION) !defined(PETSC_HAVE_NECMPI)
497: TEST*/