Actual source code: psort.c


  2: #include <petsc/private/petscimpl.h>
  3: #include <petscis.h>

  5: /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
  6: static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
  7: {
  8:   PetscInt diff;
  9:   PetscInt split, mid, partner;

 11:   diff = rankEnd - rankStart;
 12:   if (diff <= 0) return 0;
 13:   if (diff == 1) {
 14:     if (forward) {
 15:       PetscSortInt((PetscInt)n, keys);
 16:     } else {
 17:       PetscSortReverseInt((PetscInt)n, keys);
 18:     }
 19:     return 0;
 20:   }
 21:   split = 1;
 22:   while (2 * split < diff) split *= 2;
 23:   mid = rankStart + split;
 24:   if (rank < mid) {
 25:     partner = rank + split;
 26:   } else {
 27:     partner = rank - split;
 28:   }
 29:   if (partner < rankEnd) {
 30:     PetscMPIInt i;

 32:     MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);
 33:     if ((rank < partner) == (forward == PETSC_TRUE)) {
 34:       for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
 35:     } else {
 36:       for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
 37:     }
 38:   }
 39:   /* divide and conquer */
 40:   if (rank < mid) {
 41:     PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);
 42:   } else {
 43:     PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);
 44:   }
 45:   return 0;
 46: }

 48: /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
 49: static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
 50: {
 51:   PetscInt diff;
 52:   PetscInt mid;

 54:   diff = rankEnd - rankStart;
 55:   if (diff <= 0) return 0;
 56:   if (diff == 1) {
 57:     if (forward) {
 58:       PetscSortInt(n, keys);
 59:     } else {
 60:       PetscSortReverseInt(n, keys);
 61:     }
 62:     return 0;
 63:   }
 64:   mid = rankStart + diff / 2;
 65:   /* divide and conquer */
 66:   if (rank < mid) {
 67:     PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward);
 68:   } else {
 69:     PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);
 70:   }
 71:   /* bitonic merge */
 72:   PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);
 73:   return 0;
 74: }

 76: static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
 77: {
 78:   PetscMPIInt size, rank, tag, mpin;
 79:   PetscInt   *buffer;

 82:   PetscCommGetNewTag(comm, &tag);
 83:   MPI_Comm_size(comm, &size);
 84:   MPI_Comm_rank(comm, &rank);
 85:   PetscMPIIntCast(n, &mpin);
 86:   PetscMalloc1(n, &buffer);
 87:   PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);
 88:   PetscFree(buffer);
 89:   return 0;
 90: }

 92: static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
 93: {
 94:   PetscMPIInt  size, rank;
 95:   PetscInt    *pivots, *finalpivots, i;
 96:   PetscInt     non_empty, my_first, count;
 97:   PetscMPIInt *keys_per, max_keys_per;

 99:   MPI_Comm_size(mapin->comm, &size);
100:   MPI_Comm_rank(mapin->comm, &rank);

102:   /* Choose P - 1 pivots that would be ideal for the distribution on this process */
103:   PetscMalloc1(size - 1, &pivots);
104:   for (i = 0; i < size - 1; i++) {
105:     PetscInt index;

107:     if (!mapin->n) {
108:       /* if this rank is empty, put "infinity" in the list.  each process knows how many empty
109:        * processes are in the layout, so those values will be ignored from the end of the sorted
110:        * pivots */
111:       pivots[i] = PETSC_MAX_INT;
112:     } else {
113:       /* match the distribution to the desired output described by mapout by getting the index
114:        * that is approximately the appropriate fraction through the list */
115:       index     = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
116:       index     = PetscMin(index, (mapin->n - 1));
117:       index     = PetscMax(index, 0);
118:       pivots[i] = keysin[index];
119:     }
120:   }
121:   /* sort the pivots in parallel */
122:   PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);
123:   if (PetscDefined(USE_DEBUG)) {
124:     PetscBool sorted;

126:     PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);
128:   }

130:   /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
131:    * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
132:   non_empty = size;
133:   for (i = 0; i < size; i++)
134:     if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
135:   PetscCalloc1(size + 1, &keys_per);
136:   my_first = -1;
137:   if (non_empty) {
138:     for (i = 0; i < size - 1; i++) {
139:       size_t sample      = (i + 1) * non_empty - 1;
140:       size_t sample_rank = sample / (size - 1);

142:       keys_per[sample_rank]++;
143:       if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
144:     }
145:   }
146:   for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
147:   PetscMalloc1(size * max_keys_per, &finalpivots);
148:   /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
149:    * and allgather them */
150:   for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
151:   for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
152:   MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm);
153:   for (i = 0, count = 0; i < size; i++) {
154:     PetscInt j;

156:     for (j = 0; j < max_keys_per; j++) {
157:       if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
158:     }
159:   }
160:   *outpivots = finalpivots;
161:   PetscFree(keys_per);
162:   PetscFree(pivots);
163:   return 0;
164: }

166: static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
167: {
168:   PetscMPIInt  size, rank;
169:   PetscInt     myOffset, nextOffset;
170:   PetscInt     i;
171:   PetscMPIInt  total, filled;
172:   PetscMPIInt  sender, nfirst, nsecond;
173:   PetscMPIInt  firsttag, secondtag;
174:   MPI_Request  firstreqrcv;
175:   MPI_Request *firstreqs;
176:   MPI_Request *secondreqs;
177:   MPI_Status   firststatus;

179:   MPI_Comm_size(map->comm, &size);
180:   MPI_Comm_rank(map->comm, &rank);
181:   PetscCommGetNewTag(map->comm, &firsttag);
182:   PetscCommGetNewTag(map->comm, &secondtag);
183:   myOffset = 0;
184:   PetscMalloc2(size, &firstreqs, size, &secondreqs);
185:   MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);
186:   myOffset = nextOffset - n;
187:   total    = map->range[rank + 1] - map->range[rank];
188:   if (total > 0) MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);
189:   for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
190:     PetscInt itotal;
191:     PetscInt overlap, oStart, oEnd;

193:     itotal = map->range[i + 1] - map->range[i];
194:     if (itotal <= 0) continue;
195:     oStart  = PetscMax(myOffset, map->range[i]);
196:     oEnd    = PetscMin(nextOffset, map->range[i + 1]);
197:     overlap = oEnd - oStart;
198:     if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
199:       /* send first message */
200:       MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));
201:     } else if (overlap > 0) {
202:       /* send second message */
203:       MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));
204:     } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
205:       /* send empty second message */
206:       MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));
207:     }
208:   }
209:   filled = 0;
210:   sender = -1;
211:   if (total > 0) {
212:     MPI_Wait(&firstreqrcv, &firststatus);
213:     sender = firststatus.MPI_SOURCE;
214:     MPI_Get_count(&firststatus, MPIU_INT, &filled);
215:   }
216:   while (filled < total) {
217:     PetscMPIInt mfilled;
218:     MPI_Status  stat;

220:     sender++;
221:     MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);
222:     MPI_Get_count(&stat, MPIU_INT, &mfilled);
223:     filled += mfilled;
224:   }
225:   MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);
226:   MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);
227:   PetscFree2(firstreqs, secondreqs);
228:   return 0;
229: }

231: static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
232: {
233:   PetscMPIInt  size, rank;
234:   PetscInt    *pivots = NULL, *buffer;
235:   PetscInt     i, j;
236:   PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;

238:   MPI_Comm_size(mapin->comm, &size);
239:   MPI_Comm_rank(mapin->comm, &rank);
240:   PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);
241:   /* sort locally */
242:   PetscSortInt(mapin->n, keysin);
243:   /* get P - 1 pivots */
244:   PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);
245:   /* determine which entries in the sorted array go to which other processes based on the pivots */
246:   for (i = 0, j = 0; i < size - 1; i++) {
247:     PetscInt prev = j;

249:     while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
250:     offsets_snd[i]  = prev;
251:     keys_per_snd[i] = j - prev;
252:   }
253:   offsets_snd[size - 1]  = j;
254:   keys_per_snd[size - 1] = mapin->n - j;
255:   offsets_snd[size]      = mapin->n;
256:   /* get the incoming sizes */
257:   MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);
258:   offsets_rcv[0] = 0;
259:   for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
260:   nrecv = offsets_rcv[size];
261:   /* all to all exchange */
262:   PetscMalloc1(nrecv, &buffer);
263:   MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);
264:   PetscFree(pivots);
265:   PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);

267:   /* local sort */
268:   PetscSortInt(nrecv, buffer);
269: #if defined(PETSC_USE_DEBUG)
270:   {
271:     PetscBool sorted;

273:     PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);
275:   }
276: #endif

278:   /* redistribute to the desired order */
279:   PetscParallelRedistribute(mapout, nrecv, buffer, keysout);
280:   PetscFree(buffer);
281:   return 0;
282: }

284: /*@
285:   PetscParallelSortInt - Globally sort a distributed array of integers

287:   Collective

289:   Input Parameters:
290: + mapin - `PetscLayout` describing the distribution of the input keys
291: . mapout - `PetscLayout` describing the desired distribution of the output keys
292: - keysin - the pre-sorted array of integers

294:   Output Parameter:
295: . keysout - the array in which the sorted integers will be stored.  If mapin == mapout, then keysin may be equal to keysout.

297:   Level: developer

299:   Notes:
300:   This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
301: .vb
302:   - sorts locally
303:   - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
304:   - using to the pivots to repartition the keys by all-to-all exchange
305:   - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
306:   - redistributing to match the mapout layout
307: .ve

309:   If keysin != keysout, then keysin will not be changed during `PetscParallelSortInt()`.

311: .seealso: `PetscSortInt()`, `PetscParallelSortedInt()`
312: @*/
313: PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
314: {
315:   PetscMPIInt size;
316:   PetscMPIInt result;
317:   PetscInt   *keysincopy = NULL;

321:   MPI_Comm_compare(mapin->comm, mapout->comm, &result);
323:   PetscLayoutSetUp(mapin);
324:   PetscLayoutSetUp(mapout);
328:   MPI_Comm_size(mapin->comm, &size);
329:   if (size == 1) {
330:     if (keysout != keysin) PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));
331:     PetscSortInt(mapout->n, keysout);
332:     if (size == 1) return 0;
333:   }
334:   if (keysout != keysin) {
335:     PetscMalloc1(mapin->n, &keysincopy);
336:     PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));
337:     keysin = keysincopy;
338:   }
339:   PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);
340: #if defined(PETSC_USE_DEBUG)
341:   {
342:     PetscBool sorted;

344:     PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);
346:   }
347: #endif
348:   PetscFree(keysincopy);
349:   return 0;
350: }