Actual source code: mpits.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PetscLogEvent PETSC_BuildTwoSided;
5: PetscLogEvent PETSC_BuildTwoSidedF;
7: const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL};
9: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
11: /*@
12: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
14: Logically Collective
16: Input Parameters:
17: + comm - `PETSC_COMM_WORLD`
18: - twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()`
20: Level: developer
22: Note:
23: This option is currently global, but could be made per-communicator.
25: .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType`
26: @*/
27: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided)
28: {
30: PetscMPIInt b1[2], b2[2];
31: b1[0] = -(PetscMPIInt)twosided;
32: b1[1] = (PetscMPIInt)twosided;
33: MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm);
35: }
36: _twosided_type = twosided;
37: return 0;
38: }
40: /*@
41: PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication
43: Logically Collective
45: Output Parameters:
46: + comm - communicator on which to query algorithm
47: - twosided - algorithm to use for `PetscCommBuildTwoSided()`
49: Level: developer
51: .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType`
52: @*/
53: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided)
54: {
55: PetscMPIInt size;
57: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
58: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
59: MPI_Comm_size(comm, &size);
60: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
61: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
62: if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
63: #endif
64: PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL);
65: }
66: *twosided = _twosided_type;
67: return 0;
68: }
70: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
71: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
72: {
73: PetscMPIInt nrecvs, tag, done, i;
74: MPI_Aint lb, unitbytes;
75: char *tdata;
76: MPI_Request *sendreqs, barrier;
77: PetscSegBuffer segrank, segdata;
78: PetscBool barrier_started;
80: PetscCommDuplicate(comm, &comm, &tag);
81: MPI_Type_get_extent(dtype, &lb, &unitbytes);
83: tdata = (char *)todata;
84: PetscMalloc1(nto, &sendreqs);
85: for (i = 0; i < nto; i++) MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i);
86: PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank);
87: PetscSegBufferCreate(unitbytes, 4 * count, &segdata);
89: nrecvs = 0;
90: barrier = MPI_REQUEST_NULL;
91: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
92: * but we need to work around it. */
93: barrier_started = PETSC_FALSE;
94: for (done = 0; !done;) {
95: PetscMPIInt flag;
96: MPI_Status status;
97: MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status);
98: if (flag) { /* incoming message */
99: PetscMPIInt *recvrank;
100: void *buf;
101: PetscSegBufferGet(segrank, 1, &recvrank);
102: PetscSegBufferGet(segdata, count, &buf);
103: *recvrank = status.MPI_SOURCE;
104: MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE);
105: nrecvs++;
106: }
107: if (!barrier_started) {
108: PetscMPIInt sent, nsends;
109: PetscMPIIntCast(nto, &nsends);
110: MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE);
111: if (sent) {
112: MPI_Ibarrier(comm, &barrier);
113: barrier_started = PETSC_TRUE;
114: PetscFree(sendreqs);
115: }
116: } else {
117: MPI_Test(&barrier, &done, MPI_STATUS_IGNORE);
118: }
119: }
120: *nfrom = nrecvs;
121: PetscSegBufferExtractAlloc(segrank, fromranks);
122: PetscSegBufferDestroy(&segrank);
123: PetscSegBufferExtractAlloc(segdata, fromdata);
124: PetscSegBufferDestroy(&segdata);
125: PetscCommDestroy(&comm);
126: return 0;
127: }
128: #endif
130: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
131: {
132: PetscMPIInt size, rank, *iflags, nrecvs, tag, *franks, i, flg;
133: MPI_Aint lb, unitbytes;
134: char *tdata, *fdata;
135: MPI_Request *reqs, *sendreqs;
136: MPI_Status *statuses;
137: PetscCommCounter *counter;
139: MPI_Comm_size(comm, &size);
140: MPI_Comm_rank(comm, &rank);
141: PetscCommDuplicate(comm, &comm, &tag);
142: MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg);
144: if (!counter->iflags) {
145: PetscCalloc1(size, &counter->iflags);
146: iflags = counter->iflags;
147: } else {
148: iflags = counter->iflags;
149: PetscArrayzero(iflags, size);
150: }
151: for (i = 0; i < nto; i++) iflags[toranks[i]] = 1;
152: MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm);
153: nrecvs = iflags[rank];
154: MPI_Type_get_extent(dtype, &lb, &unitbytes);
156: PetscMalloc(nrecvs * count * unitbytes, &fdata);
157: tdata = (char *)todata;
158: PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses);
159: sendreqs = reqs + nrecvs;
160: for (i = 0; i < nrecvs; i++) MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i);
161: for (i = 0; i < nto; i++) MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i);
162: MPI_Waitall(nto + nrecvs, reqs, statuses);
163: PetscMalloc1(nrecvs, &franks);
164: for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
165: PetscFree2(reqs, statuses);
166: PetscCommDestroy(&comm);
168: *nfrom = nrecvs;
169: *fromranks = franks;
170: *(void **)fromdata = fdata;
171: return 0;
172: }
174: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
175: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
176: {
177: PetscMPIInt size, *iflags, nrecvs, tag, *franks, i, flg;
178: MPI_Aint lb, unitbytes;
179: char *tdata, *fdata;
180: MPI_Request *reqs, *sendreqs;
181: MPI_Status *statuses;
182: PetscCommCounter *counter;
184: MPI_Comm_size(comm, &size);
185: PetscCommDuplicate(comm, &comm, &tag);
186: MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg);
188: if (!counter->iflags) {
189: PetscCalloc1(size, &counter->iflags);
190: iflags = counter->iflags;
191: } else {
192: iflags = counter->iflags;
193: PetscArrayzero(iflags, size);
194: }
195: for (i = 0; i < nto; i++) iflags[toranks[i]] = 1;
196: MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm);
197: MPI_Type_get_extent(dtype, &lb, &unitbytes);
199: PetscMalloc(nrecvs * count * unitbytes, &fdata);
200: tdata = (char *)todata;
201: PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses);
202: sendreqs = reqs + nrecvs;
203: for (i = 0; i < nrecvs; i++) MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i);
204: for (i = 0; i < nto; i++) MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i);
205: MPI_Waitall(nto + nrecvs, reqs, statuses);
206: PetscMalloc1(nrecvs, &franks);
207: for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
208: PetscFree2(reqs, statuses);
209: PetscCommDestroy(&comm);
211: *nfrom = nrecvs;
212: *fromranks = franks;
213: *(void **)fromdata = fdata;
214: return 0;
215: }
216: #endif
218: /*@C
219: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
221: Collective
223: Input Parameters:
224: + comm - communicator
225: . count - number of entries to send/receive (must match on all ranks)
226: . dtype - datatype to send/receive from each rank (must match on all ranks)
227: . nto - number of ranks to send data to
228: . toranks - ranks to send to (array of length nto)
229: - todata - data to send to each rank (packed)
231: Output Parameters:
232: + nfrom - number of ranks receiving messages from
233: . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
234: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
236: Level: developer
238: Options Database Key:
239: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.
241: Notes:
242: This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
243: `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data.
245: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
247: References:
248: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
249: Scalable communication protocols for dynamic sparse data exchange, 2010.
251: .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType`
252: @*/
253: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
254: {
255: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
257: PetscSysInitializePackage();
258: PetscLogEventSync(PETSC_BuildTwoSided, comm);
259: PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0);
260: PetscCommBuildTwoSidedGetType(comm, &buildtype);
261: switch (buildtype) {
262: case PETSC_BUILDTWOSIDED_IBARRIER:
263: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
264: PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata);
265: break;
266: #else
267: SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
268: #endif
269: case PETSC_BUILDTWOSIDED_ALLREDUCE:
270: PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata);
271: break;
272: case PETSC_BUILDTWOSIDED_REDSCATTER:
273: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
274: PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata);
275: break;
276: #else
277: SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
278: #endif
279: default:
280: SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication");
281: }
282: PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0);
283: return 0;
284: }
286: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
287: {
288: PetscMPIInt i, *tag;
289: MPI_Aint lb, unitbytes;
290: MPI_Request *sendreq, *recvreq;
292: PetscMalloc1(ntags, &tag);
293: if (ntags > 0) PetscCommDuplicate(comm, &comm, &tag[0]);
294: for (i = 1; i < ntags; i++) PetscCommGetNewTag(comm, &tag[i]);
296: /* Perform complete initial rendezvous */
297: PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata);
299: PetscMalloc1(nto * ntags, &sendreq);
300: PetscMalloc1(*nfrom * ntags, &recvreq);
302: MPI_Type_get_extent(dtype, &lb, &unitbytes);
304: for (i = 0; i < nto; i++) {
305: PetscMPIInt k;
306: for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL;
307: (*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx);
308: }
309: for (i = 0; i < *nfrom; i++) {
310: void *header = (*(char **)fromdata) + count * unitbytes * i;
311: PetscMPIInt k;
312: for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL;
313: (*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx);
314: }
315: PetscFree(tag);
316: PetscCommDestroy(&comm);
317: *toreqs = sendreq;
318: *fromreqs = recvreq;
319: return 0;
320: }
322: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
324: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
325: {
326: PetscMPIInt nrecvs, tag, *tags, done, i;
327: MPI_Aint lb, unitbytes;
328: char *tdata;
329: MPI_Request *sendreqs, *usendreqs, *req, barrier;
330: PetscSegBuffer segrank, segdata, segreq;
331: PetscBool barrier_started;
333: PetscCommDuplicate(comm, &comm, &tag);
334: PetscMalloc1(ntags, &tags);
335: for (i = 0; i < ntags; i++) PetscCommGetNewTag(comm, &tags[i]);
336: MPI_Type_get_extent(dtype, &lb, &unitbytes);
338: tdata = (char *)todata;
339: PetscMalloc1(nto, &sendreqs);
340: PetscMalloc1(nto * ntags, &usendreqs);
341: /* Post synchronous sends */
342: for (i = 0; i < nto; i++) MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i);
343: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
344: * synchronous messages above. */
345: for (i = 0; i < nto; i++) {
346: PetscMPIInt k;
347: for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL;
348: (*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx);
349: }
351: PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank);
352: PetscSegBufferCreate(unitbytes, 4 * count, &segdata);
353: PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq);
355: nrecvs = 0;
356: barrier = MPI_REQUEST_NULL;
357: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
358: * but we need to work around it. */
359: barrier_started = PETSC_FALSE;
360: for (done = 0; !done;) {
361: PetscMPIInt flag;
362: MPI_Status status;
363: MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status);
364: if (flag) { /* incoming message */
365: PetscMPIInt *recvrank, k;
366: void *buf;
367: PetscSegBufferGet(segrank, 1, &recvrank);
368: PetscSegBufferGet(segdata, count, &buf);
369: *recvrank = status.MPI_SOURCE;
370: MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE);
371: PetscSegBufferGet(segreq, ntags, &req);
372: for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL;
373: (*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx);
374: nrecvs++;
375: }
376: if (!barrier_started) {
377: PetscMPIInt sent, nsends;
378: PetscMPIIntCast(nto, &nsends);
379: MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE);
380: if (sent) {
381: MPI_Ibarrier(comm, &barrier);
382: barrier_started = PETSC_TRUE;
383: }
384: } else {
385: MPI_Test(&barrier, &done, MPI_STATUS_IGNORE);
386: }
387: }
388: *nfrom = nrecvs;
389: PetscSegBufferExtractAlloc(segrank, fromranks);
390: PetscSegBufferDestroy(&segrank);
391: PetscSegBufferExtractAlloc(segdata, fromdata);
392: PetscSegBufferDestroy(&segdata);
393: *toreqs = usendreqs;
394: PetscSegBufferExtractAlloc(segreq, fromreqs);
395: PetscSegBufferDestroy(&segreq);
396: PetscFree(sendreqs);
397: PetscFree(tags);
398: PetscCommDestroy(&comm);
399: return 0;
400: }
401: #endif
403: /*@C
404: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
406: Collective
408: Input Parameters:
409: + comm - communicator
410: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
411: . dtype - datatype to send/receive from each rank (must match on all ranks)
412: . nto - number of ranks to send data to
413: . toranks - ranks to send to (array of length nto)
414: . todata - data to send to each rank (packed)
415: . ntags - number of tags needed by send/recv callbacks
416: . send - callback invoked on sending process when ready to send primary payload
417: . recv - callback invoked on receiving process after delivery of rendezvous message
418: - ctx - context for callbacks
420: Output Parameters:
421: + nfrom - number of ranks receiving messages from
422: . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
423: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
425: Level: developer
427: Notes:
428: This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
429: `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data.
431: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
433: References:
434: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
435: Scalable communication protocols for dynamic sparse data exchange, 2010.
437: .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`
438: @*/
439: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
440: {
441: MPI_Request *toreqs, *fromreqs;
443: PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx);
444: MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE);
445: MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE);
446: PetscFree(toreqs);
447: PetscFree(fromreqs);
448: return 0;
449: }
451: /*@C
452: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
454: Collective
456: Input Parameters:
457: + comm - communicator
458: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
459: . dtype - datatype to send/receive from each rank (must match on all ranks)
460: . nto - number of ranks to send data to
461: . toranks - ranks to send to (array of length nto)
462: . todata - data to send to each rank (packed)
463: . ntags - number of tags needed by send/recv callbacks
464: . send - callback invoked on sending process when ready to send primary payload
465: . recv - callback invoked on receiving process after delivery of rendezvous message
466: - ctx - context for callbacks
468: Output Parameters:
469: + nfrom - number of ranks receiving messages from
470: . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
471: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
472: . toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`)
473: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`)
475: Level: developer
477: Notes:
478: This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
479: `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data.
481: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
483: References:
484: . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
485: Scalable communication protocols for dynamic sparse data exchange, 2010.
487: .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`
488: @*/
489: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx)
490: {
491: PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx);
492: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
493: PetscMPIInt i, size;
495: PetscSysInitializePackage();
496: MPI_Comm_size(comm, &size);
498: PetscLogEventSync(PETSC_BuildTwoSidedF, comm);
499: PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0);
500: PetscCommBuildTwoSidedGetType(comm, &buildtype);
501: switch (buildtype) {
502: case PETSC_BUILDTWOSIDED_IBARRIER:
503: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
504: f = PetscCommBuildTwoSidedFReq_Ibarrier;
505: break;
506: #else
507: SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
508: #endif
509: case PETSC_BUILDTWOSIDED_ALLREDUCE:
510: case PETSC_BUILDTWOSIDED_REDSCATTER:
511: f = PetscCommBuildTwoSidedFReq_Reference;
512: break;
513: default:
514: SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication");
515: }
516: (*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx);
517: PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0);
518: return 0;
519: }