Actual source code: psort.c
2: #include <petsc/private/petscimpl.h>
3: #include <petscis.h>
5: /* This is the bitonic merge that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
6: static PetscErrorCode PetscParallelSortInt_Bitonic_Merge(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
7: {
8: PetscInt diff;
9: PetscInt split, mid, partner;
11: diff = rankEnd - rankStart;
12: if (diff <= 0) return 0;
13: if (diff == 1) {
14: if (forward) {
15: PetscSortInt((PetscInt)n, keys);
16: } else {
17: PetscSortReverseInt((PetscInt)n, keys);
18: }
19: return 0;
20: }
21: split = 1;
22: while (2 * split < diff) split *= 2;
23: mid = rankStart + split;
24: if (rank < mid) {
25: partner = rank + split;
26: } else {
27: partner = rank - split;
28: }
29: if (partner < rankEnd) {
30: PetscMPIInt i;
32: MPI_Sendrecv(keys, n, MPIU_INT, partner, tag, buffer, n, MPIU_INT, partner, tag, comm, MPI_STATUS_IGNORE);
33: if ((rank < partner) == (forward == PETSC_TRUE)) {
34: for (i = 0; i < n; i++) keys[i] = (keys[i] <= buffer[i]) ? keys[i] : buffer[i];
35: } else {
36: for (i = 0; i < n; i++) keys[i] = (keys[i] > buffer[i]) ? keys[i] : buffer[i];
37: }
38: }
39: /* divide and conquer */
40: if (rank < mid) {
41: PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, mid, rank, n, keys, buffer, forward);
42: } else {
43: PetscParallelSortInt_Bitonic_Merge(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);
44: }
45: return 0;
46: }
48: /* This is the bitonic sort that works on non-power-of-2 sizes found at http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/oddn.htm */
49: static PetscErrorCode PetscParallelSortInt_Bitonic_Recursive(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt rankStart, PetscMPIInt rankEnd, PetscMPIInt rank, PetscMPIInt n, PetscInt keys[], PetscInt buffer[], PetscBool forward)
50: {
51: PetscInt diff;
52: PetscInt mid;
54: diff = rankEnd - rankStart;
55: if (diff <= 0) return 0;
56: if (diff == 1) {
57: if (forward) {
58: PetscSortInt(n, keys);
59: } else {
60: PetscSortReverseInt(n, keys);
61: }
62: return 0;
63: }
64: mid = rankStart + diff / 2;
65: /* divide and conquer */
66: if (rank < mid) {
67: PetscParallelSortInt_Bitonic_Recursive(comm, tag, rankStart, mid, rank, n, keys, buffer, (PetscBool)!forward);
68: } else {
69: PetscParallelSortInt_Bitonic_Recursive(comm, tag, mid, rankEnd, rank, n, keys, buffer, forward);
70: }
71: /* bitonic merge */
72: PetscParallelSortInt_Bitonic_Merge(comm, tag, rankStart, rankEnd, rank, n, keys, buffer, forward);
73: return 0;
74: }
76: static PetscErrorCode PetscParallelSortInt_Bitonic(MPI_Comm comm, PetscInt n, PetscInt keys[])
77: {
78: PetscMPIInt size, rank, tag, mpin;
79: PetscInt *buffer;
82: PetscCommGetNewTag(comm, &tag);
83: MPI_Comm_size(comm, &size);
84: MPI_Comm_rank(comm, &rank);
85: PetscMPIIntCast(n, &mpin);
86: PetscMalloc1(n, &buffer);
87: PetscParallelSortInt_Bitonic_Recursive(comm, tag, 0, size, rank, mpin, keys, buffer, PETSC_TRUE);
88: PetscFree(buffer);
89: return 0;
90: }
92: static PetscErrorCode PetscParallelSampleSelect(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt *outpivots[])
93: {
94: PetscMPIInt size, rank;
95: PetscInt *pivots, *finalpivots, i;
96: PetscInt non_empty, my_first, count;
97: PetscMPIInt *keys_per, max_keys_per;
99: MPI_Comm_size(mapin->comm, &size);
100: MPI_Comm_rank(mapin->comm, &rank);
102: /* Choose P - 1 pivots that would be ideal for the distribution on this process */
103: PetscMalloc1(size - 1, &pivots);
104: for (i = 0; i < size - 1; i++) {
105: PetscInt index;
107: if (!mapin->n) {
108: /* if this rank is empty, put "infinity" in the list. each process knows how many empty
109: * processes are in the layout, so those values will be ignored from the end of the sorted
110: * pivots */
111: pivots[i] = PETSC_MAX_INT;
112: } else {
113: /* match the distribution to the desired output described by mapout by getting the index
114: * that is approximately the appropriate fraction through the list */
115: index = ((PetscReal)mapout->range[i + 1]) * ((PetscReal)mapin->n) / ((PetscReal)mapout->N);
116: index = PetscMin(index, (mapin->n - 1));
117: index = PetscMax(index, 0);
118: pivots[i] = keysin[index];
119: }
120: }
121: /* sort the pivots in parallel */
122: PetscParallelSortInt_Bitonic(mapin->comm, size - 1, pivots);
123: if (PetscDefined(USE_DEBUG)) {
124: PetscBool sorted;
126: PetscParallelSortedInt(mapin->comm, size - 1, pivots, &sorted);
128: }
130: /* if there are Z nonempty processes, we have (P - 1) * Z real pivots, and we want to select
131: * at indices Z - 1, 2*Z - 1, ... (P - 1) * Z - 1 */
132: non_empty = size;
133: for (i = 0; i < size; i++)
134: if (mapout->range[i] == mapout->range[i + 1]) non_empty--;
135: PetscCalloc1(size + 1, &keys_per);
136: my_first = -1;
137: if (non_empty) {
138: for (i = 0; i < size - 1; i++) {
139: size_t sample = (i + 1) * non_empty - 1;
140: size_t sample_rank = sample / (size - 1);
142: keys_per[sample_rank]++;
143: if (my_first < 0 && (PetscMPIInt)sample_rank == rank) my_first = (PetscInt)(sample - sample_rank * (size - 1));
144: }
145: }
146: for (i = 0, max_keys_per = 0; i < size; i++) max_keys_per = PetscMax(keys_per[i], max_keys_per);
147: PetscMalloc1(size * max_keys_per, &finalpivots);
148: /* now that we know how many pivots each process will provide, gather the selected pivots at the start of the array
149: * and allgather them */
150: for (i = 0; i < keys_per[rank]; i++) pivots[i] = pivots[my_first + i * non_empty];
151: for (i = keys_per[rank]; i < max_keys_per; i++) pivots[i] = PETSC_MAX_INT;
152: MPI_Allgather(pivots, max_keys_per, MPIU_INT, finalpivots, max_keys_per, MPIU_INT, mapin->comm);
153: for (i = 0, count = 0; i < size; i++) {
154: PetscInt j;
156: for (j = 0; j < max_keys_per; j++) {
157: if (j < keys_per[i]) finalpivots[count++] = finalpivots[i * max_keys_per + j];
158: }
159: }
160: *outpivots = finalpivots;
161: PetscFree(keys_per);
162: PetscFree(pivots);
163: return 0;
164: }
166: static PetscErrorCode PetscParallelRedistribute(PetscLayout map, PetscInt n, PetscInt arrayin[], PetscInt arrayout[])
167: {
168: PetscMPIInt size, rank;
169: PetscInt myOffset, nextOffset;
170: PetscInt i;
171: PetscMPIInt total, filled;
172: PetscMPIInt sender, nfirst, nsecond;
173: PetscMPIInt firsttag, secondtag;
174: MPI_Request firstreqrcv;
175: MPI_Request *firstreqs;
176: MPI_Request *secondreqs;
177: MPI_Status firststatus;
179: MPI_Comm_size(map->comm, &size);
180: MPI_Comm_rank(map->comm, &rank);
181: PetscCommGetNewTag(map->comm, &firsttag);
182: PetscCommGetNewTag(map->comm, &secondtag);
183: myOffset = 0;
184: PetscMalloc2(size, &firstreqs, size, &secondreqs);
185: MPI_Scan(&n, &nextOffset, 1, MPIU_INT, MPI_SUM, map->comm);
186: myOffset = nextOffset - n;
187: total = map->range[rank + 1] - map->range[rank];
188: if (total > 0) MPI_Irecv(arrayout, total, MPIU_INT, MPI_ANY_SOURCE, firsttag, map->comm, &firstreqrcv);
189: for (i = 0, nsecond = 0, nfirst = 0; i < size; i++) {
190: PetscInt itotal;
191: PetscInt overlap, oStart, oEnd;
193: itotal = map->range[i + 1] - map->range[i];
194: if (itotal <= 0) continue;
195: oStart = PetscMax(myOffset, map->range[i]);
196: oEnd = PetscMin(nextOffset, map->range[i + 1]);
197: overlap = oEnd - oStart;
198: if (map->range[i] >= myOffset && map->range[i] < nextOffset) {
199: /* send first message */
200: MPI_Isend(&arrayin[map->range[i] - myOffset], overlap, MPIU_INT, i, firsttag, map->comm, &(firstreqs[nfirst++]));
201: } else if (overlap > 0) {
202: /* send second message */
203: MPI_Isend(&arrayin[oStart - myOffset], overlap, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));
204: } else if (overlap == 0 && myOffset > map->range[i] && myOffset < map->range[i + 1]) {
205: /* send empty second message */
206: MPI_Isend(&arrayin[oStart - myOffset], 0, MPIU_INT, i, secondtag, map->comm, &(secondreqs[nsecond++]));
207: }
208: }
209: filled = 0;
210: sender = -1;
211: if (total > 0) {
212: MPI_Wait(&firstreqrcv, &firststatus);
213: sender = firststatus.MPI_SOURCE;
214: MPI_Get_count(&firststatus, MPIU_INT, &filled);
215: }
216: while (filled < total) {
217: PetscMPIInt mfilled;
218: MPI_Status stat;
220: sender++;
221: MPI_Recv(&arrayout[filled], total - filled, MPIU_INT, sender, secondtag, map->comm, &stat);
222: MPI_Get_count(&stat, MPIU_INT, &mfilled);
223: filled += mfilled;
224: }
225: MPI_Waitall(nfirst, firstreqs, MPI_STATUSES_IGNORE);
226: MPI_Waitall(nsecond, secondreqs, MPI_STATUSES_IGNORE);
227: PetscFree2(firstreqs, secondreqs);
228: return 0;
229: }
231: static PetscErrorCode PetscParallelSortInt_Samplesort(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
232: {
233: PetscMPIInt size, rank;
234: PetscInt *pivots = NULL, *buffer;
235: PetscInt i, j;
236: PetscMPIInt *keys_per_snd, *keys_per_rcv, *offsets_snd, *offsets_rcv, nrecv;
238: MPI_Comm_size(mapin->comm, &size);
239: MPI_Comm_rank(mapin->comm, &rank);
240: PetscMalloc4(size, &keys_per_snd, size, &keys_per_rcv, size + 1, &offsets_snd, size + 1, &offsets_rcv);
241: /* sort locally */
242: PetscSortInt(mapin->n, keysin);
243: /* get P - 1 pivots */
244: PetscParallelSampleSelect(mapin, mapout, keysin, &pivots);
245: /* determine which entries in the sorted array go to which other processes based on the pivots */
246: for (i = 0, j = 0; i < size - 1; i++) {
247: PetscInt prev = j;
249: while ((j < mapin->n) && (keysin[j] < pivots[i])) j++;
250: offsets_snd[i] = prev;
251: keys_per_snd[i] = j - prev;
252: }
253: offsets_snd[size - 1] = j;
254: keys_per_snd[size - 1] = mapin->n - j;
255: offsets_snd[size] = mapin->n;
256: /* get the incoming sizes */
257: MPI_Alltoall(keys_per_snd, 1, MPI_INT, keys_per_rcv, 1, MPI_INT, mapin->comm);
258: offsets_rcv[0] = 0;
259: for (i = 0; i < size; i++) offsets_rcv[i + 1] = offsets_rcv[i] + keys_per_rcv[i];
260: nrecv = offsets_rcv[size];
261: /* all to all exchange */
262: PetscMalloc1(nrecv, &buffer);
263: MPI_Alltoallv(keysin, keys_per_snd, offsets_snd, MPIU_INT, buffer, keys_per_rcv, offsets_rcv, MPIU_INT, mapin->comm);
264: PetscFree(pivots);
265: PetscFree4(keys_per_snd, keys_per_rcv, offsets_snd, offsets_rcv);
267: /* local sort */
268: PetscSortInt(nrecv, buffer);
269: #if defined(PETSC_USE_DEBUG)
270: {
271: PetscBool sorted;
273: PetscParallelSortedInt(mapin->comm, nrecv, buffer, &sorted);
275: }
276: #endif
278: /* redistribute to the desired order */
279: PetscParallelRedistribute(mapout, nrecv, buffer, keysout);
280: PetscFree(buffer);
281: return 0;
282: }
284: /*@
285: PetscParallelSortInt - Globally sort a distributed array of integers
287: Collective
289: Input Parameters:
290: + mapin - `PetscLayout` describing the distribution of the input keys
291: . mapout - `PetscLayout` describing the desired distribution of the output keys
292: - keysin - the pre-sorted array of integers
294: Output Parameter:
295: . keysout - the array in which the sorted integers will be stored. If mapin == mapout, then keysin may be equal to keysout.
297: Level: developer
299: Notes:
300: This implements a distributed samplesort, which, with local array sizes n_in and n_out, global size N, and global number of processes P, does:
301: .vb
302: - sorts locally
303: - chooses pivots by sorting (in parallel) (P-1) pivot suggestions from each process using bitonic sort and allgathering a subset of (P-1) of those
304: - using to the pivots to repartition the keys by all-to-all exchange
305: - sorting the repartitioned keys locally (the array is now globally sorted, but does not match the mapout layout)
306: - redistributing to match the mapout layout
307: .ve
309: If keysin != keysout, then keysin will not be changed during `PetscParallelSortInt()`.
311: .seealso: `PetscSortInt()`, `PetscParallelSortedInt()`
312: @*/
313: PetscErrorCode PetscParallelSortInt(PetscLayout mapin, PetscLayout mapout, PetscInt keysin[], PetscInt keysout[])
314: {
315: PetscMPIInt size;
316: PetscMPIInt result;
317: PetscInt *keysincopy = NULL;
321: MPI_Comm_compare(mapin->comm, mapout->comm, &result);
323: PetscLayoutSetUp(mapin);
324: PetscLayoutSetUp(mapout);
328: MPI_Comm_size(mapin->comm, &size);
329: if (size == 1) {
330: if (keysout != keysin) PetscMemcpy(keysout, keysin, mapin->n * sizeof(PetscInt));
331: PetscSortInt(mapout->n, keysout);
332: if (size == 1) return 0;
333: }
334: if (keysout != keysin) {
335: PetscMalloc1(mapin->n, &keysincopy);
336: PetscMemcpy(keysincopy, keysin, mapin->n * sizeof(PetscInt));
337: keysin = keysincopy;
338: }
339: PetscParallelSortInt_Samplesort(mapin, mapout, keysin, keysout);
340: #if defined(PETSC_USE_DEBUG)
341: {
342: PetscBool sorted;
344: PetscParallelSortedInt(mapout->comm, mapout->n, keysout, &sorted);
346: }
347: #endif
348: PetscFree(keysincopy);
349: return 0;
350: }