Actual source code: sfalltoall.c
1: #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2: #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
3: #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>
5: /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
6: typedef PetscSF_Allgatherv PetscSF_Alltoall;
8: /*===================================================================================*/
9: /* Implementations of SF public APIs */
10: /*===================================================================================*/
11: static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
12: {
13: PetscInt i;
15: if (nroots) *nroots = sf->nroots;
16: if (nleaves) *nleaves = sf->nleaves;
17: if (ilocal) *ilocal = NULL; /* Contiguous local indices */
18: if (iremote) {
19: if (!sf->remote) {
20: PetscMalloc1(sf->nleaves, &sf->remote);
21: sf->remote_alloc = sf->remote;
22: for (i = 0; i < sf->nleaves; i++) {
23: sf->remote[i].rank = i;
24: sf->remote[i].index = i;
25: }
26: }
27: *iremote = sf->remote;
28: }
29: return 0;
30: }
32: static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
33: {
34: PetscSFLink link;
35: MPI_Comm comm;
36: void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */
37: MPI_Request *req;
39: PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link);
40: PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata);
41: PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */);
42: PetscObjectGetComm((PetscObject)sf, &comm);
43: PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_../../../../../..2LEAF, &rootbuf, &leafbuf, &req, NULL);
44: PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_../../../../../..2LEAF);
45: MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req);
46: return 0;
47: }
49: static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
50: {
51: PetscSFLink link;
52: MPI_Comm comm;
53: void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */
54: MPI_Request *req;
56: PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link);
57: PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata);
58: PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */);
59: PetscObjectGetComm((PetscObject)sf, &comm);
60: PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2../../../../../.., &rootbuf, &leafbuf, &req, NULL);
61: PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2../../../../../..);
62: MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req);
63: return 0;
64: }
66: static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out)
67: {
68: PetscInt nroots = 1, nleaves = 1, *ilocal;
69: PetscSFNode *iremote = NULL;
70: PetscSF lsf;
71: PetscMPIInt rank;
73: nroots = 1;
74: nleaves = 1;
75: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
76: PetscMalloc1(nleaves, &ilocal);
77: PetscMalloc1(nleaves, &iremote);
78: ilocal[0] = rank;
79: iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */
80: iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */
82: PetscSFCreate(PETSC_COMM_SELF, &lsf);
83: PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);
84: PetscSFSetUp(lsf);
85: *out = lsf;
86: return 0;
87: }
89: static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
90: {
91: PetscInt i, *tmproots, *ilocal, ndranks, ndiranks;
92: PetscSFNode *iremote;
93: PetscMPIInt nroots, *roots, nleaves, *leaves, rank;
94: MPI_Comm comm;
95: PetscSF_Basic *bas;
96: PetscSF esf;
98: PetscObjectGetComm((PetscObject)sf, &comm);
99: MPI_Comm_rank(comm, &rank);
101: /* Uniq selected[] and store the result in roots[] */
102: PetscMalloc1(nselected, &tmproots);
103: PetscArraycpy(tmproots, selected, nselected);
104: PetscSortRemoveDupsInt(&nselected, tmproots); /* nselected might be changed */
106: nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */
107: PetscMalloc1(nselected, &roots);
108: for (i = 0; i < nselected; i++) roots[i] = tmproots[i];
109: PetscFree(tmproots);
111: /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
112: PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/);
114: /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
115: ndranks = 0;
116: for (i = 0; i < nleaves; i++) {
117: if (leaves[i] == rank) {
118: leaves[i] = -rank;
119: ndranks = 1;
120: break;
121: }
122: }
123: PetscSortMPIInt(nleaves, leaves);
124: if (nleaves && leaves[0] < 0) leaves[0] = rank;
126: /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
127: PetscMalloc1(nleaves, &ilocal);
128: PetscMalloc1(nleaves, &iremote);
129: for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */
130: ilocal[i] = leaves[i];
131: iremote[i].rank = leaves[i];
132: iremote[i].index = leaves[i];
133: }
134: PetscSFCreate(comm, &esf);
135: PetscSFSetType(esf, PETSCSFBASIC); /* This optimized routine can only create a basic sf */
136: PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);
138: /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
139: PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote);
140: esf->nranks = nleaves;
141: esf->ndranks = ndranks;
142: esf->roffset[0] = 0;
143: for (i = 0; i < nleaves; i++) {
144: esf->ranks[i] = leaves[i];
145: esf->roffset[i + 1] = i + 1;
146: esf->rmine[i] = leaves[i];
147: esf->rremote[i] = leaves[i];
148: }
150: /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
151: bas = (PetscSF_Basic *)esf->data;
152: PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset);
153: PetscMalloc1(nroots, &bas->irootloc);
154: /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
155: ndiranks = 0;
156: for (i = 0; i < nroots; i++) {
157: if (roots[i] == rank) {
158: roots[i] = -rank;
159: ndiranks = 1;
160: break;
161: }
162: }
163: PetscSortMPIInt(nroots, roots);
164: if (nroots && roots[0] < 0) roots[0] = rank;
166: bas->niranks = nroots;
167: bas->ndiranks = ndiranks;
168: bas->ioffset[0] = 0;
169: bas->itotal = nroots;
170: for (i = 0; i < nroots; i++) {
171: bas->iranks[i] = roots[i];
172: bas->ioffset[i + 1] = i + 1;
173: bas->irootloc[i] = roots[i];
174: }
176: /* See PetscSFCreateEmbeddedRootSF_Basic */
177: esf->nleafreqs = esf->nranks - esf->ndranks;
178: bas->nrootreqs = bas->niranks - bas->ndiranks;
179: esf->persistent = PETSC_TRUE;
180: /* Setup packing related fields */
181: PetscSFSetUpPackFields(esf);
183: esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
184: *newsf = esf;
185: return 0;
186: }
188: PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
189: {
190: PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data;
192: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
193: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
195: /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
196: sf->ops->Destroy = PetscSFDestroy_Allgatherv;
197: sf->ops->Reset = PetscSFReset_Allgatherv;
198: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv;
199: sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv;
201: /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
202: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv;
203: sf->ops->SetUp = PetscSFSetUp_Allgather;
205: /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
206: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;
208: /* Alltoall stuff */
209: sf->ops->GetGraph = PetscSFGetGraph_Alltoall;
210: sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall;
211: sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall;
212: sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall;
213: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall;
215: PetscNew(&dat);
216: sf->data = (void *)dat;
217: return 0;
218: }