Actual source code: sortso.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5: {
6: PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right;
7: return (l < r) ? -1 : (l > r);
8: }
10: static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11: {
12: PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right;
13: return (l < r) ? -1 : (l > r);
14: }
16: static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17: {
18: PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right;
19: return (l < r) ? -1 : (l > r);
20: }
22: #define MIN_GALLOP_CONST_GLOBAL 8
23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24: typedef int (*CompFunc)(const void *, const void *, void *);
26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27: #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__)
28: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29: {
30: __builtin_memcpy(t, b, size);
31: __builtin_memmove(b, a, size);
32: __builtin_memcpy(a, t, size);
33: return;
34: }
36: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37: {
38: __builtin_memcpy(t, ar, asize);
39: __builtin_memmove(ar, al, asize);
40: __builtin_memcpy(al, t, asize);
41: __builtin_memcpy(t, br, bsize);
42: __builtin_memmove(br, bl, bsize);
43: __builtin_memcpy(bl, t, bsize);
44: return;
45: }
47: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
48: {
49: __builtin_memcpy(dest, src, size);
50: return;
51: }
53: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54: {
55: __builtin_memcpy(adest, asrc, asize);
56: __builtin_memcpy(bdest, bsrc, bsize);
57: return;
58: }
60: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
61: {
62: __builtin_memmove(dest, src, size);
63: return;
64: }
66: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67: {
68: __builtin_memmove(adest, asrc, asize);
69: __builtin_memmove(bdest, bsrc, bsize);
70: return;
71: }
72: #else
73: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74: {
75: PETSC_COMM_SELF, PetscMemcpy(t, b, size);
76: PETSC_COMM_SELF, PetscMemmove(b, a, size);
77: PETSC_COMM_SELF, PetscMemcpy(a, t, size);
78: return;
79: }
81: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
82: {
83: PETSC_COMM_SELF, PetscMemcpy(t, ar, asize);
84: PETSC_COMM_SELF, PetscMemmove(ar, al, asize);
85: PETSC_COMM_SELF, PetscMemcpy(al, t, asize);
86: PETSC_COMM_SELF, PetscMemcpy(t, br, bsize);
87: PETSC_COMM_SELF, PetscMemmove(br, bl, bsize);
88: PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize);
89: return;
90: }
92: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
93: {
94: PETSC_COMM_SELF, PetscMemcpy(dest, src, size);
95: return;
96: }
98: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
99: {
100: PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize);
101: PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize);
102: return;
103: }
105: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
106: {
107: PETSC_COMM_SELF, PetscMemmove(dest, src, size);
108: return;
109: }
111: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
112: {
113: PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize);
114: PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize);
115: return;
116: }
117: #endif
119: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
120: x. Output also inclusive.
122: NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
124: {0,1,2,3,4,5}
126: when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
127: */
128: static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
129: {
130: PetscInt last = l, k = 1, mid, cur = l + 1;
132: *m = l;
133: PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l);
134: if ((*cmp)(x, arr + r * size, ctx) >= 0) {
135: *m = r;
136: return 0;
137: }
138: if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) return 0;
139: while (PETSC_TRUE) {
140: if (PetscUnlikely(cur > r)) {
141: cur = r;
142: break;
143: }
144: if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break;
145: last = cur;
146: cur += (k <<= 1) + 1;
147: ++k;
148: }
149: /* standard binary search but take last 0 mid 0 cur 1 into account*/
150: while (cur > last + 1) {
151: mid = last + ((cur - last) >> 1);
152: if ((*cmp)(x, (arr) + mid * size, ctx) < 0) {
153: cur = mid;
154: } else {
155: last = mid;
156: }
157: }
158: *m = cur;
159: return 0;
160: }
162: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
163: < x. Output also inclusive */
164: static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
165: {
166: PetscInt last = r, k = 1, mid, cur = r - 1;
168: *m = r;
169: PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l);
170: if ((*cmp)(x, arr + l * size, ctx) <= 0) {
171: *m = l;
172: return 0;
173: }
174: if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) return 0;
175: while (PETSC_TRUE) {
176: if (PetscUnlikely(cur < l)) {
177: cur = l;
178: break;
179: }
180: if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break;
181: last = cur;
182: cur -= (k <<= 1) + 1;
183: ++k;
184: }
185: /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
186: while (last > cur + 1) {
187: mid = last - ((last - cur) >> 1);
188: if ((*cmp)(x, (arr) + mid * size, ctx) > 0) {
189: cur = mid;
190: } else {
191: last = mid;
192: }
193: }
194: *m = cur;
195: return 0;
196: }
198: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
199: complete array, left is first index of left array, mid is first index of right array, right is last index of right
200: array */
201: static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
202: {
203: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
204: const PetscInt llen = mid - left;
206: Petsc_memcpy(tarr, arr + (left * size), llen * size);
207: while ((i < llen) && (j <= right)) {
208: if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) {
209: Petsc_memcpy(arr + (k * size), tarr + (i * size), size);
210: ++k;
211: gallopright = 0;
212: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
213: PetscInt l1, l2, diff1, diff2;
214: do {
215: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
216: /* search temp for right[j], can move up to that of temp into arr immediately */
217: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1);
218: diff1 = l1 - i;
219: Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
220: k += diff1;
221: i = l1;
222: /* search right for temp[i], can move up to that many of right into arr */
223: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2);
224: diff2 = l2 - j;
225: Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size);
226: k += diff2;
227: j = l2;
228: if (i >= llen || j > right) break;
229: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
230: ++MIN_GALLOP_GLOBAL;
231: }
232: } else {
233: Petsc_memmove(arr + (k * size), arr + (j * size), size);
234: ++k;
235: gallopleft = 0;
236: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
237: PetscInt l1, l2, diff1, diff2;
238: do {
239: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
240: /* search right for temp[i], can move up to that many of right into arr */
241: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2);
242: diff2 = l2 - j;
243: Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size);
244: k += diff2;
245: j = l2;
246: /* search temp for right[j], can copy up to that of temp into arr immediately */
247: PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1);
248: diff1 = l1 - i;
249: Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
250: k += diff1;
251: i = l1;
252: if (i >= llen || j > right) break;
253: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
254: ++MIN_GALLOP_GLOBAL;
255: }
256: }
257: }
258: if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size);
259: return 0;
260: }
262: static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
263: {
264: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
265: const PetscInt llen = mid - left;
267: Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize);
268: while ((i < llen) && (j <= right)) {
269: if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) {
270: Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize);
271: ++k;
272: gallopright = 0;
273: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
274: PetscInt l1, l2, diff1, diff2;
275: do {
276: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
277: /* search temp for right[j], can move up to that of temp into arr immediately */
278: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1);
279: diff1 = l1 - i;
280: Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
281: k += diff1;
282: i = l1;
283: /* search right for temp[i], can move up to that many of right into arr */
284: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2);
285: diff2 = l2 - j;
286: Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
287: k += diff2;
288: j = l2;
289: if (i >= llen || j > right) break;
290: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
291: ++MIN_GALLOP_GLOBAL;
292: }
293: } else {
294: Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize);
295: ++k;
296: gallopleft = 0;
297: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
298: PetscInt l1, l2, diff1, diff2;
299: do {
300: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
301: /* search right for temp[i], can move up to that many of right into arr */
302: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2);
303: diff2 = l2 - j;
304: Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
305: k += diff2;
306: j = l2;
307: /* search temp for right[j], can copy up to that of temp into arr immediately */
308: PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1);
309: diff1 = l1 - i;
310: Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
311: k += diff1;
312: i = l1;
313: if (i >= llen || j > right) break;
314: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
315: ++MIN_GALLOP_GLOBAL;
316: }
317: }
318: }
319: if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize);
320: return 0;
321: }
323: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
324: complete array, left is first index of left array, mid is first index of right array, right is last index of right
325: array */
326: static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
327: {
328: PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
329: const PetscInt rlen = right - mid + 1;
331: Petsc_memcpy(tarr, (arr) + mid * size, rlen * size);
332: while ((i >= 0) && (j >= left)) {
333: if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) {
334: Petsc_memcpy((arr) + k * size, (tarr) + i * size, size);
335: --k;
336: gallopleft = 0;
337: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
338: PetscInt l1, l2, diff1, diff2;
339: do {
340: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
341: /* search temp for left[j], can copy up to that many of temp into arr */
342: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1);
343: diff1 = i - l1;
344: Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
345: k -= diff1;
346: i = l1;
347: /* search left for temp[i], can move up to that many of left up arr */
348: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2);
349: diff2 = j - l2;
350: Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
351: k -= diff2;
352: j = l2;
353: if (i < 0 || j < left) break;
354: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
355: ++MIN_GALLOP_GLOBAL;
356: }
357: } else {
358: Petsc_memmove((arr) + k * size, (arr) + j * size, size);
359: --k;
360: gallopright = 0;
361: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
362: PetscInt l1, l2, diff1, diff2;
363: do {
364: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
365: /* search left for temp[i], can move up to that many of left up arr */
366: PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2);
367: diff2 = j - l2;
368: Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
369: k -= diff2;
370: j = l2;
371: /* search temp for left[j], can copy up to that many of temp into arr */
372: PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1);
373: diff1 = i - l1;
374: Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
375: k -= diff1;
376: i = l1;
377: if (i < 0 || j < left) break;
378: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
379: ++MIN_GALLOP_GLOBAL;
380: }
381: }
382: }
383: if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size);
384: return 0;
385: }
387: static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
388: {
389: PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
390: const PetscInt rlen = right - mid + 1;
392: Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize);
393: while ((i >= 0) && (j >= left)) {
394: if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) {
395: Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize);
396: --k;
397: gallopleft = 0;
398: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
399: PetscInt l1, l2, diff1, diff2;
400: do {
401: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
402: /* search temp for left[j], can copy up to that many of temp into arr */
403: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1);
404: diff1 = i - l1;
405: Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
406: k -= diff1;
407: i = l1;
408: /* search left for temp[i], can move up to that many of left up arr */
409: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2);
410: diff2 = j - l2;
411: Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
412: k -= diff2;
413: j = l2;
414: if (i < 0 || j < left) break;
415: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
416: ++MIN_GALLOP_GLOBAL;
417: }
418: } else {
419: Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize);
420: --k;
421: gallopright = 0;
422: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
423: PetscInt l1, l2, diff1, diff2;
424: do {
425: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
426: /* search left for temp[i], can move up to that many of left up arr */
427: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2);
428: diff2 = j - l2;
429: Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
430: k -= diff2;
431: j = l2;
432: /* search temp for left[j], can copy up to that many of temp into arr */
433: PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1);
434: diff1 = i - l1;
435: Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
436: k -= diff1;
437: i = l1;
438: if (i < 0 || j < left) break;
439: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
440: ++MIN_GALLOP_GLOBAL;
441: }
442: }
443: }
444: if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize);
445: return 0;
446: }
448: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
449: bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
450: static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
451: {
452: PetscInt i = start == left ? start + 1 : start;
454: for (; i <= right; ++i) {
455: PetscInt j = i - 1;
456: Petsc_memcpy(tarr, arr + (i * size), size);
457: while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) {
458: COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size);
459: --j;
460: }
461: Petsc_memcpy((arr) + (j + 1) * size, tarr, size);
462: }
463: return 0;
464: }
466: static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
467: {
468: PetscInt i = start == left ? start + 1 : start;
470: for (; i <= right; ++i) {
471: PetscInt j = i - 1;
472: Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
473: while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) {
474: COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize);
475: --j;
476: }
477: Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize);
478: }
479: return 0;
480: }
482: /* See PetscInsertionSort_Private */
483: static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
484: {
485: PetscInt i = start == left ? start + 1 : start;
487: for (; i <= right; ++i) {
488: PetscInt l = left, r = i;
489: Petsc_memcpy(tarr, arr + (i * size), size);
490: do {
491: const PetscInt m = l + ((r - l) >> 1);
492: if ((*cmp)(tarr, arr + (m * size), ctx) < 0) {
493: r = m;
494: } else {
495: l = m + 1;
496: }
497: } while (l < r);
498: Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size);
499: Petsc_memcpy(arr + (l * size), tarr, size);
500: }
501: return 0;
502: }
504: static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
505: {
506: PetscInt i = start == left ? start + 1 : start;
508: for (; i <= right; ++i) {
509: PetscInt l = left, r = i;
510: Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
511: do {
512: const PetscInt m = l + ((r - l) >> 1);
513: if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) {
514: r = m;
515: } else {
516: l = m + 1;
517: }
518: } while (l < r);
519: Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize);
520: Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize);
521: }
522: return 0;
523: }
525: typedef struct {
526: PetscInt size;
527: PetscInt start;
528: #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
529: } PetscTimSortStack;
530: #else
531: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt));
532: #endif
534: typedef struct {
535: char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
536: size_t size;
537: size_t maxsize;
538: } PetscTimSortBuffer;
540: static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
541: {
542: if (PetscLikely(newSize <= buff->size)) return 0;
543: {
544: /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
545: size_t newMax = PetscMin(newSize * newSize, buff->maxsize);
546: PetscFree(buff->ptr);
547: PetscMalloc1(newMax, &buff->ptr);
548: buff->size = newMax;
549: }
550: return 0;
551: }
553: static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
554: {
555: for (; stacksize; --stacksize) {
556: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
557: if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) {
558: PetscInt l, m = stack[stacksize].start, r;
559: /* Search A for B[0] insertion */
560: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l);
561: /* Search B for A[-1] insertion */
562: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r);
563: if (m - l <= r - m) {
564: PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size);
565: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
566: } else {
567: PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size);
568: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
569: }
570: }
571: /* Update A with merge */
572: stack[stacksize - 1].size += stack[stacksize].size;
573: }
574: return 0;
575: }
577: static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
578: {
579: for (; stacksize; --stacksize) {
580: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
581: if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) {
582: PetscInt l, m = stack[stacksize].start, r;
583: /* Search A for B[0] insertion */
584: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l);
585: /* Search B for A[-1] insertion */
586: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r);
587: if (m - l <= r - m) {
588: PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize);
589: PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize);
590: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
591: } else {
592: PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize);
593: PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize);
594: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
595: }
596: }
597: /* Update A with merge */
598: stack[stacksize - 1].size += stack[stacksize].size;
599: }
600: return 0;
601: }
603: static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
604: {
605: PetscInt i = *stacksize;
607: while (i) {
608: PetscInt l, m, r, itemp = i;
610: if (i == 1) {
611: /* A = stack[i-1], B = stack[i] */
612: if (stack[i - 1].size < stack[i].size) {
613: /* if A[-1] <= B[0] then sorted */
614: if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
615: m = stack[i].start;
616: /* Search A for B[0] insertion */
617: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l);
618: /* Search B for A[-1] insertion */
619: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r);
620: if (m - l <= r - m) {
621: PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size);
622: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
623: } else {
624: PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size);
625: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
626: }
627: }
628: /* Update A with merge */
629: stack[i - 1].size += stack[i].size;
630: --i;
631: }
632: } else {
633: /* i > 2, i.e. C exists
634: A = stack[i-2], B = stack[i-1], C = stack[i]; */
635: if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
636: if (stack[i - 2].size < stack[i].size) {
637: /* merge B into A, but if A[-1] <= B[0] then already sorted */
638: if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) {
639: m = stack[i - 1].start;
640: /* Search A for B[0] insertion */
641: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l);
642: /* Search B for A[-1] insertion */
643: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r);
644: if (m - l <= r - m) {
645: PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size);
646: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
647: } else {
648: PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size);
649: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
650: }
651: }
652: /* Update A with merge */
653: stack[i - 2].size += stack[i - 1].size;
654: /* Push C up the stack */
655: stack[i - 1].start = stack[i].start;
656: stack[i - 1].size = stack[i].size;
657: } else {
658: /* merge C into B */
659: mergeBC:
660: /* If B[-1] <= C[0] then... you know the drill */
661: if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
662: m = stack[i].start;
663: /* Search B for C[0] insertion */
664: PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l);
665: /* Search C for B[-1] insertion */
666: PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r);
667: if (m - l <= r - m) {
668: PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size);
669: PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
670: } else {
671: PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size);
672: PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
673: }
674: }
675: /* Update B with merge */
676: stack[i - 1].size += stack[i].size;
677: }
678: --i;
679: } else if (stack[i - 1].size <= stack[i].size) {
680: /* merge C into B */
681: goto mergeBC;
682: }
683: }
684: if (itemp == i) break;
685: }
686: *stacksize = i;
687: return 0;
688: }
690: static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
691: {
692: PetscInt i = *stacksize;
694: while (i) {
695: PetscInt l, m, r, itemp = i;
697: if (i == 1) {
698: /* A = stack[i-1], B = stack[i] */
699: if (stack[i - 1].size < stack[i].size) {
700: /* if A[-1] <= B[0] then sorted */
701: if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
702: m = stack[i].start;
703: /* Search A for B[0] insertion */
704: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l);
705: /* Search B for A[-1] insertion */
706: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r);
707: if (m - l <= r - m) {
708: PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize);
709: PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize);
710: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
711: } else {
712: PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize);
713: PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize);
714: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
715: }
716: }
717: /* Update A with merge */
718: stack[i - 1].size += stack[i].size;
719: --i;
720: }
721: } else {
722: /* i > 2, i.e. C exists
723: A = stack[i-2], B = stack[i-1], C = stack[i]; */
724: if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
725: if (stack[i - 2].size < stack[i].size) {
726: /* merge B into A, but if A[-1] <= B[0] then already sorted */
727: if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) {
728: m = stack[i - 1].start;
729: /* Search A for B[0] insertion */
730: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l);
731: /* Search B for A[-1] insertion */
732: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r);
733: if (m - l <= r - m) {
734: PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize);
735: PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize);
736: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
737: } else {
738: PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize);
739: PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize);
740: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
741: }
742: }
743: /* Update A with merge */
744: stack[i - 2].size += stack[i - 1].size;
745: /* Push C up the stack */
746: stack[i - 1].start = stack[i].start;
747: stack[i - 1].size = stack[i].size;
748: } else {
749: /* merge C into B */
750: mergeBC:
751: /* If B[-1] <= C[0] then... you know the drill */
752: if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
753: m = stack[i].start;
754: /* Search B for C[0] insertion */
755: PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l);
756: /* Search C for B[-1] insertion */
757: PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r);
758: if (m - l <= r - m) {
759: PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize);
760: PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize);
761: PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
762: } else {
763: PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize);
764: PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize);
765: PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
766: }
767: }
768: /* Update B with merge */
769: stack[i - 1].size += stack[i].size;
770: }
771: --i;
772: } else if (stack[i - 1].size <= stack[i].size) {
773: /* merge C into B */
774: goto mergeBC;
775: }
776: }
777: if (itemp == i) break;
778: }
779: *stacksize = i;
780: return 0;
781: }
783: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
784: elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
785: binary insertion sort or regulat insertion sort */
786: static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
787: {
788: const PetscInt re = PetscMin(runstart + minrun, n - 1);
789: PetscInt ri = runstart;
791: if (PetscUnlikely(runstart == n - 1)) {
792: *runend = runstart;
793: return 0;
794: }
795: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
796: if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) {
797: ++ri;
798: while (ri < n - 1) {
799: if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break;
800: ++ri;
801: }
802: {
803: PetscInt lo = runstart, hi = ri;
804: for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size);
805: }
806: } else {
807: ++ri;
808: while (ri < n - 1) {
809: if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break;
810: ++ri;
811: }
812: }
813: if (ri < re) {
814: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
815: binary search */
816: if (ri - runstart <= minrun >> 1) {
817: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
818: PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
819: } else {
820: PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
821: }
822: *runend = re;
823: } else *runend = ri;
824: return 0;
825: }
827: static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
828: {
829: const PetscInt re = PetscMin(runstart + minrun, n - 1);
830: PetscInt ri = runstart;
832: if (PetscUnlikely(runstart == n - 1)) {
833: *runend = runstart;
834: return 0;
835: }
836: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
837: if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) {
838: ++ri;
839: while (ri < n - 1) {
840: if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break;
841: ++ri;
842: }
843: {
844: PetscInt lo = runstart, hi = ri;
845: for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr);
846: }
847: } else {
848: ++ri;
849: while (ri < n - 1) {
850: if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break;
851: ++ri;
852: }
853: }
854: if (ri < re) {
855: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
856: binary search */
857: if (ri - runstart <= minrun >> 1) {
858: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
859: PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
860: } else {
861: PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
862: }
863: *runend = re;
864: } else *runend = ri;
865: return 0;
866: }
868: /*@C
869: PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
871: Not Collective
873: Input Parameters:
874: + n - number of values
875: . arr - array to be sorted
876: . size - size in bytes of the datatype held in arr
877: . cmp - function pointer to comparison function
878: - ctx - optional context to be passed to comparison function, NULL if not needed
880: Output Parameters:
881: . arr - sorted array
883: Notes:
884: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
885: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
886: select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
887: do so it repeatedly triggers attempts throughout to merge adjacent runs.
889: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
890: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
891: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
892: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
893: likely all contain similar data.
895: Sample usage:
896: The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
897: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
898: may also
899: change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
900: the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
901: order
903: .vb
904: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
905: my_type l = *(my_type *) left, r = *(my_type *) right;
906: return (l < r) ? -1 : (l > r);
907: }
908: .ve
909: Note the context is unused here but you may use it to pass and subsequently access whatever information required
910: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
911: Then pass the function
912: .vb
913: PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
914: .ve
916: Fortran Notes:
917: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
918: returns result. For example
919: .vb
920: subroutine CompareIntegers(left,right,ctx,result)
921: implicit none
923: PetscInt,intent(in) :: left, right
924: type(UserCtx) :: ctx
925: integer,intent(out) :: result
927: if (left < right) then
928: result = -1
929: else if (left == right) then
930: result = 0
931: else
932: result = 1
933: end if
934: return
935: end subroutine CompareIntegers
936: .ve
938: References:
939: . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt
941: Level: developer
943: .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()`
944: @*/
945: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
946: {
947: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
948: PetscTimSortStack runstack[128];
949: PetscTimSortBuffer buff;
950: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
951: It is so unlikely that this limit is reached that this is __never__ checked for */
953: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
954: is a power of 2 or one plus a power of 2 */
955: {
956: PetscInt t = n, r = 0;
957: /* r becomes 1 if the least significant bits contain at least one off bit */
958: while (t >= 64) {
959: r |= t & 1;
960: t >>= 1;
961: }
962: minrun = t + r;
963: }
964: if (PetscDefined(USE_DEBUG)) {
965: PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);
966: if (n < 64) {
967: PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n);
969: }
970: PetscMalloc1((size_t)minrun * size, &buff.ptr);
971: buff.size = (size_t)minrun * size;
972: buff.maxsize = (size_t)n * size;
973: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
974: while (runstart < n) {
975: /* Check if additional entries are at least partially ordered and build natural run */
976: PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
977: runstack[stacksize].start = runstart;
978: runstack[stacksize].size = runend - runstart + 1;
979: PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
980: ++stacksize;
981: runstart = runend + 1;
982: }
983: /* Have been inside while, so discard last stacksize++ */
984: --stacksize;
985: PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
986: PetscFree(buff.ptr);
987: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
988: return 0;
989: }
991: /*@C
992: PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
993: reorders a second array to match the first. The arrays need not be the same type.
995: Not Collective
997: Input Parameters:
998: + n - number of values
999: . asize - size in bytes of the datatype held in arr
1000: . bsize - size in bytes of the datatype held in barr
1001: . cmp - function pointer to comparison function
1002: - ctx - optional context to be passed to comparison function, NULL if not needed
1004: Input/Output Parameters:
1005: + arr - array to be sorted, on output it is sorted
1006: - barr - array to be reordered, on output it is reordered
1008: Notes:
1009: The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1010: overlap.
1012: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1013: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1014: to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1015: arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1017: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1018: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1019: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1020: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1021: likely all contain similar data.
1023: Sample usage:
1024: The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
1025: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1026: may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1027: guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1028: increasing order
1030: .vb
1031: int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1032: my_type l = *(my_type *) left, r = *(my_type *) right;
1033: return (l < r) ? -1 : (l > r);
1034: }
1035: .ve
1036: Note the context is unused here but you may use it to pass and subsequently access whatever information required
1037: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1038: Then pass the function
1039: .vb
1040: PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1041: .ve
1043: Fortran Notes:
1044: To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1045: returns result. For example
1046: .vb
1047: subroutine CompareIntegers(left,right,ctx,result)
1048: implicit none
1050: PetscInt,intent(in) :: left, right
1051: type(UserCtx) :: ctx
1052: integer,intent(out) :: result
1054: if (left < right) then
1055: result = -1
1056: else if (left == right) then
1057: result = 0
1058: else
1059: result = 1
1060: end if
1061: return
1062: end subroutine CompareIntegers
1063: .ve
1065: References:
1066: . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1068: Level: developer
1070: .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()`
1071: @*/
1072: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1073: {
1074: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
1075: PetscTimSortStack runstack[128];
1076: PetscTimSortBuffer abuff, bbuff;
1077: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1078: It is so unlikely that this limit is reached that this is __never__ checked for */
1080: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1081: is a power of 2 or one plus a power of 2 */
1082: {
1083: PetscInt t = n, r = 0;
1084: /* r becomes 1 if the least significant bits contain at least one off bit */
1085: while (t >= 64) {
1086: r |= t & 1;
1087: t >>= 1;
1088: }
1089: minrun = t + r;
1090: }
1091: if (PetscDefined(USE_DEBUG)) {
1092: PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);
1094: }
1095: PetscMalloc1((size_t)minrun * asize, &abuff.ptr);
1096: abuff.size = (size_t)minrun * asize;
1097: abuff.maxsize = (size_t)n * asize;
1098: PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr);
1099: bbuff.size = (size_t)minrun * bsize;
1100: bbuff.maxsize = (size_t)n * bsize;
1101: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1102: while (runstart < n) {
1103: /* Check if additional entries are at least partially ordered and build natural run */
1104: PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1105: runstack[stacksize].start = runstart;
1106: runstack[stacksize].size = runend - runstart + 1;
1107: PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1108: ++stacksize;
1109: runstart = runend + 1;
1110: }
1111: /* Have been inside while, so discard last stacksize++ */
1112: --stacksize;
1113: PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1114: PetscFree(abuff.ptr);
1115: PetscFree(bbuff.ptr);
1116: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1117: return 0;
1118: }
1120: /*@
1121: PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order.
1123: Not Collective
1125: Input Parameters:
1126: + n - number of values
1127: - arr - array of integers
1129: Output Parameters:
1130: . arr - sorted array of integers
1132: Notes:
1133: If the array is less than 64 entries long `PetscSortInt()` is automatically used.
1135: This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is
1136: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1137: recommended that the user benchmark their code to see which routine is fastest.
1139: Level: intermediate
1141: .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()`
1142: @*/
1143: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1144: {
1145: if (n <= 1) return 0;
1147: if (n < 64) {
1148: PetscSortInt(n, arr);
1149: } else {
1150: PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1151: }
1152: return 0;
1153: }
1155: /*@
1156: PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second
1157: `PetscInt` array to match the first.
1159: Not Collective
1161: Input Parameter:
1162: . n - number of values
1164: Input/Output Parameters:
1165: + arr1 - array of integers to be sorted, modified on output
1166: - arr2 - array of integers to be reordered, modified on output
1168: Notes:
1169: The arrays CANNOT overlap.
1171: This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1172: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1173: recommended that the user benchmark their code to see which routine is fastest.
1175: Level: intermediate
1177: .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()`
1178: @*/
1179: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1180: {
1181: if (n <= 1) return 0;
1184: /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1185: PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1186: return 0;
1187: }
1189: /*@
1190: PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order.
1192: Not Collective
1194: Input Parameters:
1195: + n - number of values
1196: - arr - array of `PetscMPIInt`
1198: Output Parameters:
1199: . arr - sorted array of integers
1201: Notes:
1202: If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used.
1204: This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is
1205: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1206: recommended that the user benchmark their code to see which routine is fastest.
1208: Level: intermediate
1210: .seealso: `PetscTimSort()`, `PetscSortMPIInt()`
1211: @*/
1212: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1213: {
1214: if (n <= 1) return 0;
1216: if (n < 64) {
1217: PetscSortMPIInt(n, arr);
1218: } else {
1219: PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1220: }
1221: return 0;
1222: }
1224: /*@
1225: PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt`
1226: array to match the first.
1228: Not Collective
1230: Input Parameter:
1231: . n - number of values
1233: Input/Output Parameters:
1234: . arr1 - array of integers to be sorted, modified on output
1235: - arr2 - array of integers to be reordered, modified on output
1237: Notes:
1238: The arrays CANNOT overlap.
1240: This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is
1241: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1242: recommended that the user benchmark their code to see which routine is fastest.
1244: Level: intermediate
1246: .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()`
1247: @*/
1248: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1249: {
1250: if (n <= 1) return 0;
1253: /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1254: PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1255: return 0;
1256: }
1258: /*@
1259: PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order.
1261: Not Collective
1263: Input Parameters:
1264: + n - number of values
1265: - arr - array of `PetscReal`
1267: Output Parameters:
1268: . arr - sorted array of integers
1270: Notes:
1271: If the array is less than 64 entries long `PetscSortReal()` is automatically used.
1273: This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is
1274: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1275: recommended that the user benchmark their code to see which routine is fastest.
1277: Level: intermediate
1279: .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()`
1280: @*/
1281: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1282: {
1283: if (n <= 1) return 0;
1285: if (n < 64) {
1286: PetscSortReal(n, arr);
1287: } else {
1288: PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1289: }
1290: return 0;
1291: }
1293: /*@
1294: PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second
1295: array of `PetscInt` to match the first.
1297: Not Collective
1299: Input Parameter:
1300: . n - number of values
1302: Input/Output Parameters:
1303: . arr1 - array of `PetscReal` to be sorted, modified on output
1304: - arr2 - array of `PetscInt` to be reordered, modified on output
1306: Notes:
1307: This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is
1308: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1309: recommended that the user benchmark their code to see which routine is fastest.
1311: Level: intermediate
1313: .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()`
1314: @*/
1315: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1316: {
1317: if (n <= 1) return 0;
1320: /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1321: PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1322: return 0;
1323: }