Actual source code: sortso.c

  1: #include <petscsys.h>
  2: #include <petsc/private/petscimpl.h>

  4: static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
  5: {
  6:   PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right;
  7:   return (l < r) ? -1 : (l > r);
  8: }

 10: static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
 11: {
 12:   PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right;
 13:   return (l < r) ? -1 : (l > r);
 14: }

 16: static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
 17: {
 18:   PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right;
 19:   return (l < r) ? -1 : (l > r);
 20: }

 22: #define MIN_GALLOP_CONST_GLOBAL 8
 23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
 24: typedef int (*CompFunc)(const void *, const void *, void *);

 26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
 27: #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__)
 28: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
 29: {
 30:   __builtin_memcpy(t, b, size);
 31:   __builtin_memmove(b, a, size);
 32:   __builtin_memcpy(a, t, size);
 33:   return;
 34: }

 36: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
 37: {
 38:   __builtin_memcpy(t, ar, asize);
 39:   __builtin_memmove(ar, al, asize);
 40:   __builtin_memcpy(al, t, asize);
 41:   __builtin_memcpy(t, br, bsize);
 42:   __builtin_memmove(br, bl, bsize);
 43:   __builtin_memcpy(bl, t, bsize);
 44:   return;
 45: }

 47: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
 48: {
 49:   __builtin_memcpy(dest, src, size);
 50:   return;
 51: }

 53: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 54: {
 55:   __builtin_memcpy(adest, asrc, asize);
 56:   __builtin_memcpy(bdest, bsrc, bsize);
 57:   return;
 58: }

 60: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
 61: {
 62:   __builtin_memmove(dest, src, size);
 63:   return;
 64: }

 66: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 67: {
 68:   __builtin_memmove(adest, asrc, asize);
 69:   __builtin_memmove(bdest, bsrc, bsize);
 70:   return;
 71: }
 72: #else
 73: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
 74: {
 75:   PETSC_COMM_SELF, PetscMemcpy(t, b, size);
 76:   PETSC_COMM_SELF, PetscMemmove(b, a, size);
 77:   PETSC_COMM_SELF, PetscMemcpy(a, t, size);
 78:   return;
 79: }

 81: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
 82: {
 83:   PETSC_COMM_SELF, PetscMemcpy(t, ar, asize);
 84:   PETSC_COMM_SELF, PetscMemmove(ar, al, asize);
 85:   PETSC_COMM_SELF, PetscMemcpy(al, t, asize);
 86:   PETSC_COMM_SELF, PetscMemcpy(t, br, bsize);
 87:   PETSC_COMM_SELF, PetscMemmove(br, bl, bsize);
 88:   PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize);
 89:   return;
 90: }

 92: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
 93: {
 94:   PETSC_COMM_SELF, PetscMemcpy(dest, src, size);
 95:   return;
 96: }

 98: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
 99: {
100:   PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize);
101:   PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize);
102:   return;
103: }

105: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
106: {
107:   PETSC_COMM_SELF, PetscMemmove(dest, src, size);
108:   return;
109: }

111: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
112: {
113:   PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize);
114:   PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize);
115:   return;
116: }
117: #endif

119: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
120:  x. Output also inclusive.

122:  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:

124:    {0,1,2,3,4,5}

126:    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
127:   */
128: static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
129: {
130:   PetscInt last = l, k = 1, mid, cur = l + 1;

132:   *m = l;
133:   PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l);
134:   if ((*cmp)(x, arr + r * size, ctx) >= 0) {
135:     *m = r;
136:     return 0;
137:   }
138:   if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) return 0;
139:   while (PETSC_TRUE) {
140:     if (PetscUnlikely(cur > r)) {
141:       cur = r;
142:       break;
143:     }
144:     if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break;
145:     last = cur;
146:     cur += (k <<= 1) + 1;
147:     ++k;
148:   }
149:   /* standard binary search but take last 0 mid 0 cur 1 into account*/
150:   while (cur > last + 1) {
151:     mid = last + ((cur - last) >> 1);
152:     if ((*cmp)(x, (arr) + mid * size, ctx) < 0) {
153:       cur = mid;
154:     } else {
155:       last = mid;
156:     }
157:   }
158:   *m = cur;
159:   return 0;
160: }

162: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
163:  < x. Output also inclusive */
164: static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
165: {
166:   PetscInt last = r, k = 1, mid, cur = r - 1;

168:   *m = r;
169:   PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l);
170:   if ((*cmp)(x, arr + l * size, ctx) <= 0) {
171:     *m = l;
172:     return 0;
173:   }
174:   if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) return 0;
175:   while (PETSC_TRUE) {
176:     if (PetscUnlikely(cur < l)) {
177:       cur = l;
178:       break;
179:     }
180:     if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break;
181:     last = cur;
182:     cur -= (k <<= 1) + 1;
183:     ++k;
184:   }
185:   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
186:   while (last > cur + 1) {
187:     mid = last - ((last - cur) >> 1);
188:     if ((*cmp)(x, (arr) + mid * size, ctx) > 0) {
189:       cur = mid;
190:     } else {
191:       last = mid;
192:     }
193:   }
194:   *m = cur;
195:   return 0;
196: }

198: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
199:  complete array, left is first index of left array, mid is first index of right array, right is last index of right
200:  array */
201: static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
202: {
203:   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
204:   const PetscInt llen = mid - left;

206:   Petsc_memcpy(tarr, arr + (left * size), llen * size);
207:   while ((i < llen) && (j <= right)) {
208:     if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) {
209:       Petsc_memcpy(arr + (k * size), tarr + (i * size), size);
210:       ++k;
211:       gallopright = 0;
212:       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
213:         PetscInt l1, l2, diff1, diff2;
214:         do {
215:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
216:           /* search temp for right[j], can move up to that of temp into arr immediately */
217:           PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1);
218:           diff1 = l1 - i;
219:           Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
220:           k += diff1;
221:           i = l1;
222:           /* search right for temp[i], can move up to that many of right into arr */
223:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2);
224:           diff2 = l2 - j;
225:           Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size);
226:           k += diff2;
227:           j = l2;
228:           if (i >= llen || j > right) break;
229:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
230:         ++MIN_GALLOP_GLOBAL;
231:       }
232:     } else {
233:       Petsc_memmove(arr + (k * size), arr + (j * size), size);
234:       ++k;
235:       gallopleft = 0;
236:       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
237:         PetscInt l1, l2, diff1, diff2;
238:         do {
239:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
240:           /* search right for temp[i], can move up to that many of right into arr */
241:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2);
242:           diff2 = l2 - j;
243:           Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size);
244:           k += diff2;
245:           j = l2;
246:           /* search temp for right[j], can copy up to that of temp into arr immediately */
247:           PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1);
248:           diff1 = l1 - i;
249:           Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
250:           k += diff1;
251:           i = l1;
252:           if (i >= llen || j > right) break;
253:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
254:         ++MIN_GALLOP_GLOBAL;
255:       }
256:     }
257:   }
258:   if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size);
259:   return 0;
260: }

262: static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
263: {
264:   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
265:   const PetscInt llen = mid - left;

267:   Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize);
268:   while ((i < llen) && (j <= right)) {
269:     if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) {
270:       Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize);
271:       ++k;
272:       gallopright = 0;
273:       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
274:         PetscInt l1, l2, diff1, diff2;
275:         do {
276:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
277:           /* search temp for right[j], can move up to that of temp into arr immediately */
278:           PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1);
279:           diff1 = l1 - i;
280:           Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
281:           k += diff1;
282:           i = l1;
283:           /* search right for temp[i], can move up to that many of right into arr */
284:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2);
285:           diff2 = l2 - j;
286:           Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
287:           k += diff2;
288:           j = l2;
289:           if (i >= llen || j > right) break;
290:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
291:         ++MIN_GALLOP_GLOBAL;
292:       }
293:     } else {
294:       Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize);
295:       ++k;
296:       gallopleft = 0;
297:       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
298:         PetscInt l1, l2, diff1, diff2;
299:         do {
300:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
301:           /* search right for temp[i], can move up to that many of right into arr */
302:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2);
303:           diff2 = l2 - j;
304:           Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
305:           k += diff2;
306:           j = l2;
307:           /* search temp for right[j], can copy up to that of temp into arr immediately */
308:           PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1);
309:           diff1 = l1 - i;
310:           Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
311:           k += diff1;
312:           i = l1;
313:           if (i >= llen || j > right) break;
314:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
315:         ++MIN_GALLOP_GLOBAL;
316:       }
317:     }
318:   }
319:   if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize);
320:   return 0;
321: }

323: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
324:  complete array, left is first index of left array, mid is first index of right array, right is last index of right
325:  array */
326: static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
327: {
328:   PetscInt       i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
329:   const PetscInt rlen = right - mid + 1;

331:   Petsc_memcpy(tarr, (arr) + mid * size, rlen * size);
332:   while ((i >= 0) && (j >= left)) {
333:     if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) {
334:       Petsc_memcpy((arr) + k * size, (tarr) + i * size, size);
335:       --k;
336:       gallopleft = 0;
337:       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
338:         PetscInt l1, l2, diff1, diff2;
339:         do {
340:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
341:           /* search temp for left[j], can copy up to that many of temp into arr */
342:           PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1);
343:           diff1 = i - l1;
344:           Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
345:           k -= diff1;
346:           i = l1;
347:           /* search left for temp[i], can move up to that many of left up arr */
348:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2);
349:           diff2 = j - l2;
350:           Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
351:           k -= diff2;
352:           j = l2;
353:           if (i < 0 || j < left) break;
354:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
355:         ++MIN_GALLOP_GLOBAL;
356:       }
357:     } else {
358:       Petsc_memmove((arr) + k * size, (arr) + j * size, size);
359:       --k;
360:       gallopright = 0;
361:       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
362:         PetscInt l1, l2, diff1, diff2;
363:         do {
364:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
365:           /* search left for temp[i], can move up to that many of left up arr */
366:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2);
367:           diff2 = j - l2;
368:           Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
369:           k -= diff2;
370:           j = l2;
371:           /* search temp for left[j], can copy up to that many of temp into arr */
372:           PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1);
373:           diff1 = i - l1;
374:           Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
375:           k -= diff1;
376:           i = l1;
377:           if (i < 0 || j < left) break;
378:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
379:         ++MIN_GALLOP_GLOBAL;
380:       }
381:     }
382:   }
383:   if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size);
384:   return 0;
385: }

387: static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
388: {
389:   PetscInt       i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
390:   const PetscInt rlen = right - mid + 1;

392:   Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize);
393:   while ((i >= 0) && (j >= left)) {
394:     if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) {
395:       Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize);
396:       --k;
397:       gallopleft = 0;
398:       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
399:         PetscInt l1, l2, diff1, diff2;
400:         do {
401:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
402:           /* search temp for left[j], can copy up to that many of temp into arr */
403:           PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1);
404:           diff1 = i - l1;
405:           Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
406:           k -= diff1;
407:           i = l1;
408:           /* search left for temp[i], can move up to that many of left up arr */
409:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2);
410:           diff2 = j - l2;
411:           Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
412:           k -= diff2;
413:           j = l2;
414:           if (i < 0 || j < left) break;
415:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
416:         ++MIN_GALLOP_GLOBAL;
417:       }
418:     } else {
419:       Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize);
420:       --k;
421:       gallopright = 0;
422:       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
423:         PetscInt l1, l2, diff1, diff2;
424:         do {
425:           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
426:           /* search left for temp[i], can move up to that many of left up arr */
427:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2);
428:           diff2 = j - l2;
429:           Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
430:           k -= diff2;
431:           j = l2;
432:           /* search temp for left[j], can copy up to that many of temp into arr */
433:           PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1);
434:           diff1 = i - l1;
435:           Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
436:           k -= diff1;
437:           i = l1;
438:           if (i < 0 || j < left) break;
439:         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
440:         ++MIN_GALLOP_GLOBAL;
441:       }
442:     }
443:   }
444:   if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize);
445:   return 0;
446: }

448: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
449:  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
450: static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
451: {
452:   PetscInt i = start == left ? start + 1 : start;

454:   for (; i <= right; ++i) {
455:     PetscInt j = i - 1;
456:     Petsc_memcpy(tarr, arr + (i * size), size);
457:     while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) {
458:       COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size);
459:       --j;
460:     }
461:     Petsc_memcpy((arr) + (j + 1) * size, tarr, size);
462:   }
463:   return 0;
464: }

466: static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
467: {
468:   PetscInt i = start == left ? start + 1 : start;

470:   for (; i <= right; ++i) {
471:     PetscInt j = i - 1;
472:     Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
473:     while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) {
474:       COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize);
475:       --j;
476:     }
477:     Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize);
478:   }
479:   return 0;
480: }

482: /* See PetscInsertionSort_Private */
483: static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
484: {
485:   PetscInt i = start == left ? start + 1 : start;

487:   for (; i <= right; ++i) {
488:     PetscInt l = left, r = i;
489:     Petsc_memcpy(tarr, arr + (i * size), size);
490:     do {
491:       const PetscInt m = l + ((r - l) >> 1);
492:       if ((*cmp)(tarr, arr + (m * size), ctx) < 0) {
493:         r = m;
494:       } else {
495:         l = m + 1;
496:       }
497:     } while (l < r);
498:     Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size);
499:     Petsc_memcpy(arr + (l * size), tarr, size);
500:   }
501:   return 0;
502: }

504: static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
505: {
506:   PetscInt i = start == left ? start + 1 : start;

508:   for (; i <= right; ++i) {
509:     PetscInt l = left, r = i;
510:     Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
511:     do {
512:       const PetscInt m = l + ((r - l) >> 1);
513:       if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) {
514:         r = m;
515:       } else {
516:         l = m + 1;
517:       }
518:     } while (l < r);
519:     Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize);
520:     Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize);
521:   }
522:   return 0;
523: }

525: typedef struct {
526:   PetscInt size;
527:   PetscInt start;
528: #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
529: } PetscTimSortStack;
530: #else
531: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt));
532: #endif

534: typedef struct {
535:   char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
536:   size_t    size;
537:   size_t    maxsize;
538: } PetscTimSortBuffer;

540: static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
541: {
542:   if (PetscLikely(newSize <= buff->size)) return 0;
543:   {
544:     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
545:     size_t newMax = PetscMin(newSize * newSize, buff->maxsize);
546:     PetscFree(buff->ptr);
547:     PetscMalloc1(newMax, &buff->ptr);
548:     buff->size = newMax;
549:   }
550:   return 0;
551: }

553: static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
554: {
555:   for (; stacksize; --stacksize) {
556:     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
557:     if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) {
558:       PetscInt l, m = stack[stacksize].start, r;
559:       /* Search A for B[0] insertion */
560:       PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l);
561:       /* Search B for A[-1] insertion */
562:       PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r);
563:       if (m - l <= r - m) {
564:         PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size);
565:         PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
566:       } else {
567:         PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size);
568:         PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
569:       }
570:     }
571:     /* Update A with merge */
572:     stack[stacksize - 1].size += stack[stacksize].size;
573:   }
574:   return 0;
575: }

577: static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
578: {
579:   for (; stacksize; --stacksize) {
580:     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
581:     if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) {
582:       PetscInt l, m = stack[stacksize].start, r;
583:       /* Search A for B[0] insertion */
584:       PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l);
585:       /* Search B for A[-1] insertion */
586:       PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r);
587:       if (m - l <= r - m) {
588:         PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize);
589:         PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize);
590:         PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
591:       } else {
592:         PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize);
593:         PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize);
594:         PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
595:       }
596:     }
597:     /* Update A with merge */
598:     stack[stacksize - 1].size += stack[stacksize].size;
599:   }
600:   return 0;
601: }

603: static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
604: {
605:   PetscInt i = *stacksize;

607:   while (i) {
608:     PetscInt l, m, r, itemp = i;

610:     if (i == 1) {
611:       /* A = stack[i-1], B = stack[i] */
612:       if (stack[i - 1].size < stack[i].size) {
613:         /* if A[-1] <= B[0] then sorted */
614:         if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
615:           m = stack[i].start;
616:           /* Search A for B[0] insertion */
617:           PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l);
618:           /* Search B for A[-1] insertion */
619:           PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r);
620:           if (m - l <= r - m) {
621:             PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size);
622:             PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
623:           } else {
624:             PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size);
625:             PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
626:           }
627:         }
628:         /* Update A with merge */
629:         stack[i - 1].size += stack[i].size;
630:         --i;
631:       }
632:     } else {
633:       /* i > 2, i.e. C exists
634:        A = stack[i-2], B = stack[i-1], C = stack[i]; */
635:       if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
636:         if (stack[i - 2].size < stack[i].size) {
637:           /* merge B into A, but if A[-1] <= B[0] then already sorted */
638:           if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) {
639:             m = stack[i - 1].start;
640:             /* Search A for B[0] insertion */
641:             PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l);
642:             /* Search B for A[-1] insertion */
643:             PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r);
644:             if (m - l <= r - m) {
645:               PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size);
646:               PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
647:             } else {
648:               PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size);
649:               PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
650:             }
651:           }
652:           /* Update A with merge */
653:           stack[i - 2].size += stack[i - 1].size;
654:           /* Push C up the stack */
655:           stack[i - 1].start = stack[i].start;
656:           stack[i - 1].size  = stack[i].size;
657:         } else {
658:         /* merge C into B */
659:         mergeBC:
660:           /* If B[-1] <= C[0] then... you know the drill */
661:           if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
662:             m = stack[i].start;
663:             /* Search B for C[0] insertion */
664:             PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l);
665:             /* Search C for B[-1] insertion */
666:             PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r);
667:             if (m - l <= r - m) {
668:               PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size);
669:               PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
670:             } else {
671:               PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size);
672:               PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);
673:             }
674:           }
675:           /* Update B with merge */
676:           stack[i - 1].size += stack[i].size;
677:         }
678:         --i;
679:       } else if (stack[i - 1].size <= stack[i].size) {
680:         /* merge C into B */
681:         goto mergeBC;
682:       }
683:     }
684:     if (itemp == i) break;
685:   }
686:   *stacksize = i;
687:   return 0;
688: }

690: static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
691: {
692:   PetscInt i = *stacksize;

694:   while (i) {
695:     PetscInt l, m, r, itemp = i;

697:     if (i == 1) {
698:       /* A = stack[i-1], B = stack[i] */
699:       if (stack[i - 1].size < stack[i].size) {
700:         /* if A[-1] <= B[0] then sorted */
701:         if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
702:           m = stack[i].start;
703:           /* Search A for B[0] insertion */
704:           PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l);
705:           /* Search B for A[-1] insertion */
706:           PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r);
707:           if (m - l <= r - m) {
708:             PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize);
709:             PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize);
710:             PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
711:           } else {
712:             PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize);
713:             PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize);
714:             PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
715:           }
716:         }
717:         /* Update A with merge */
718:         stack[i - 1].size += stack[i].size;
719:         --i;
720:       }
721:     } else {
722:       /* i > 2, i.e. C exists
723:        A = stack[i-2], B = stack[i-1], C = stack[i]; */
724:       if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
725:         if (stack[i - 2].size < stack[i].size) {
726:           /* merge B into A, but if A[-1] <= B[0] then already sorted */
727:           if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) {
728:             m = stack[i - 1].start;
729:             /* Search A for B[0] insertion */
730:             PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l);
731:             /* Search B for A[-1] insertion */
732:             PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r);
733:             if (m - l <= r - m) {
734:               PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize);
735:               PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize);
736:               PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
737:             } else {
738:               PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize);
739:               PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize);
740:               PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
741:             }
742:           }
743:           /* Update A with merge */
744:           stack[i - 2].size += stack[i - 1].size;
745:           /* Push C up the stack */
746:           stack[i - 1].start = stack[i].start;
747:           stack[i - 1].size  = stack[i].size;
748:         } else {
749:         /* merge C into B */
750:         mergeBC:
751:           /* If B[-1] <= C[0] then... you know the drill */
752:           if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
753:             m = stack[i].start;
754:             /* Search B for C[0] insertion */
755:             PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l);
756:             /* Search C for B[-1] insertion */
757:             PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r);
758:             if (m - l <= r - m) {
759:               PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize);
760:               PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize);
761:               PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
762:             } else {
763:               PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize);
764:               PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize);
765:               PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);
766:             }
767:           }
768:           /* Update B with merge */
769:           stack[i - 1].size += stack[i].size;
770:         }
771:         --i;
772:       } else if (stack[i - 1].size <= stack[i].size) {
773:         /* merge C into B */
774:         goto mergeBC;
775:       }
776:     }
777:     if (itemp == i) break;
778:   }
779:   *stacksize = i;
780:   return 0;
781: }

783: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
784:  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
785:  binary insertion sort or regulat insertion sort */
786: static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
787: {
788:   const PetscInt re = PetscMin(runstart + minrun, n - 1);
789:   PetscInt       ri = runstart;

791:   if (PetscUnlikely(runstart == n - 1)) {
792:     *runend = runstart;
793:     return 0;
794:   }
795:   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
796:   if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) {
797:     ++ri;
798:     while (ri < n - 1) {
799:       if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break;
800:       ++ri;
801:     }
802:     {
803:       PetscInt lo = runstart, hi = ri;
804:       for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size);
805:     }
806:   } else {
807:     ++ri;
808:     while (ri < n - 1) {
809:       if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break;
810:       ++ri;
811:     }
812:   }
813:   if (ri < re) {
814:     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
815:      binary search */
816:     if (ri - runstart <= minrun >> 1) {
817:       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
818:       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
819:     } else {
820:       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
821:     }
822:     *runend = re;
823:   } else *runend = ri;
824:   return 0;
825: }

827: static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
828: {
829:   const PetscInt re = PetscMin(runstart + minrun, n - 1);
830:   PetscInt       ri = runstart;

832:   if (PetscUnlikely(runstart == n - 1)) {
833:     *runend = runstart;
834:     return 0;
835:   }
836:   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
837:   if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) {
838:     ++ri;
839:     while (ri < n - 1) {
840:       if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break;
841:       ++ri;
842:     }
843:     {
844:       PetscInt lo = runstart, hi = ri;
845:       for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr);
846:     }
847:   } else {
848:     ++ri;
849:     while (ri < n - 1) {
850:       if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break;
851:       ++ri;
852:     }
853:   }
854:   if (ri < re) {
855:     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
856:      binary search */
857:     if (ri - runstart <= minrun >> 1) {
858:       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
859:       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
860:     } else {
861:       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
862:     }
863:     *runend = re;
864:   } else *runend = ri;
865:   return 0;
866: }

868: /*@C
869:   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.

871:   Not Collective

873:   Input Parameters:
874: + n    - number of values
875: . arr  - array to be sorted
876: . size - size in bytes of the datatype held in arr
877: . cmp  - function pointer to comparison function
878: - ctx  - optional context to be passed to comparison function, NULL if not needed

880:   Output Parameters:
881: . arr  - sorted array

883:   Notes:
884:   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
885:  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
886:  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
887:  do so it repeatedly triggers attempts throughout to merge adjacent runs.

889:   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
890:   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
891:   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
892:   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
893:   likely all contain similar data.

895:   Sample usage:
896:   The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
897:   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
898:   may also
899:  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
900:  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
901:   order

903: .vb
904:   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
905:     my_type l = *(my_type *) left, r = *(my_type *) right;
906:     return (l < r) ? -1 : (l > r);
907:   }
908: .ve
909:   Note the context is unused here but you may use it to pass and subsequently access whatever information required
910:   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
911:   Then pass the function
912: .vb
913:   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
914: .ve

916:   Fortran Notes:
917:   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
918:   returns result. For example
919: .vb
920:  subroutine CompareIntegers(left,right,ctx,result)
921:    implicit none

923:    PetscInt,intent(in) :: left, right
924:    type(UserCtx)       :: ctx
925:    integer,intent(out) :: result

927:    if (left < right) then
928:      result = -1
929:    else if (left == right) then
930:      result = 0
931:    else
932:      result = 1
933:    end if
934:    return
935:  end subroutine CompareIntegers
936: .ve

938:   References:
939: . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt

941:   Level: developer

943: .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()`
944: @*/
945: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
946: {
947:   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
948:   PetscTimSortStack  runstack[128];
949:   PetscTimSortBuffer buff;
950:   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
951:    It is so unlikely that this limit is reached that this is __never__ checked for */

953:   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
954:    is a power of 2 or one plus a power of 2 */
955:   {
956:     PetscInt t = n, r = 0;
957:     /* r becomes 1 if the least significant bits contain at least one off bit */
958:     while (t >= 64) {
959:       r |= t & 1;
960:       t >>= 1;
961:     }
962:     minrun = t + r;
963:   }
964:   if (PetscDefined(USE_DEBUG)) {
965:     PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);
966:     if (n < 64) {
967:       PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n);
969:   }
970:   PetscMalloc1((size_t)minrun * size, &buff.ptr);
971:   buff.size         = (size_t)minrun * size;
972:   buff.maxsize      = (size_t)n * size;
973:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
974:   while (runstart < n) {
975:     /* Check if additional entries are at least partially ordered and build natural run */
976:     PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);
977:     runstack[stacksize].start = runstart;
978:     runstack[stacksize].size  = runend - runstart + 1;
979:     PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);
980:     ++stacksize;
981:     runstart = runend + 1;
982:   }
983:   /* Have been inside while, so discard last stacksize++ */
984:   --stacksize;
985:   PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);
986:   PetscFree(buff.ptr);
987:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
988:   return 0;
989: }

991: /*@C
992:   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
993:   reorders a second array to match the first. The arrays need not be the same type.

995:   Not Collective

997:   Input Parameters:
998: + n     - number of values
999: . asize - size in bytes of the datatype held in arr
1000: . bsize - size in bytes of the datatype held in barr
1001: . cmp   - function pointer to comparison function
1002: - ctx   - optional context to be passed to comparison function, NULL if not needed

1004:   Input/Output Parameters:
1005: + arr  - array to be sorted, on output it is sorted
1006: - barr - array to be reordered, on output it is reordered

1008:   Notes:
1009:   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1010:   overlap.

1012:   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1013:   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1014:  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1015:  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.

1017:   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1018:   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1019:   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1020:   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1021:   likely all contain similar data.

1023:   Sample usage:
1024:   The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
1025:   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1026:   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1027:   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1028:   increasing order

1030: .vb
1031:   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1032:     my_type l = *(my_type *) left, r = *(my_type *) right;
1033:     return (l < r) ? -1 : (l > r);
1034:   }
1035: .ve
1036:   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1037:   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1038:   Then pass the function
1039: .vb
1040:   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1041: .ve

1043:   Fortran Notes:
1044:   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1045:   returns result. For example
1046: .vb
1047:  subroutine CompareIntegers(left,right,ctx,result)
1048:    implicit none

1050:    PetscInt,intent(in) :: left, right
1051:    type(UserCtx)       :: ctx
1052:    integer,intent(out) :: result

1054:    if (left < right) then
1055:      result = -1
1056:    else if (left == right) then
1057:      result = 0
1058:    else
1059:      result = 1
1060:    end if
1061:    return
1062:  end subroutine CompareIntegers
1063: .ve

1065:   References:
1066: . * - Tim Peters. https://bugs.python.org/file4451/timsort.txt

1068:   Level: developer

1070: .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()`
1071: @*/
1072: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1073: {
1074:   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1075:   PetscTimSortStack  runstack[128];
1076:   PetscTimSortBuffer abuff, bbuff;
1077:   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1078:    It is so unlikely that this limit is reached that this is __never__ checked for */

1080:   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1081:    is a power of 2 or one plus a power of 2 */
1082:   {
1083:     PetscInt t = n, r = 0;
1084:     /* r becomes 1 if the least significant bits contain at least one off bit */
1085:     while (t >= 64) {
1086:       r |= t & 1;
1087:       t >>= 1;
1088:     }
1089:     minrun = t + r;
1090:   }
1091:   if (PetscDefined(USE_DEBUG)) {
1092:     PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun);
1094:   }
1095:   PetscMalloc1((size_t)minrun * asize, &abuff.ptr);
1096:   abuff.size    = (size_t)minrun * asize;
1097:   abuff.maxsize = (size_t)n * asize;
1098:   PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr);
1099:   bbuff.size        = (size_t)minrun * bsize;
1100:   bbuff.maxsize     = (size_t)n * bsize;
1101:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1102:   while (runstart < n) {
1103:     /* Check if additional entries are at least partially ordered and build natural run */
1104:     PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);
1105:     runstack[stacksize].start = runstart;
1106:     runstack[stacksize].size  = runend - runstart + 1;
1107:     PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);
1108:     ++stacksize;
1109:     runstart = runend + 1;
1110:   }
1111:   /* Have been inside while, so discard last stacksize++ */
1112:   --stacksize;
1113:   PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);
1114:   PetscFree(abuff.ptr);
1115:   PetscFree(bbuff.ptr);
1116:   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1117:   return 0;
1118: }

1120: /*@
1121:    PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order.

1123:    Not Collective

1125:    Input Parameters:
1126: +  n   - number of values
1127: -  arr - array of integers

1129:    Output Parameters:
1130: .  arr - sorted array of integers

1132:    Notes:
1133:    If the array is less than 64 entries long `PetscSortInt()` is automatically used.

1135:    This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is
1136:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1137:    recommended that the user benchmark their code to see which routine is fastest.

1139:    Level: intermediate

1141: .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()`
1142: @*/
1143: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1144: {
1145:   if (n <= 1) return 0;
1147:   if (n < 64) {
1148:     PetscSortInt(n, arr);
1149:   } else {
1150:     PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1151:   }
1152:   return 0;
1153: }

1155: /*@
1156:    PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second
1157:    `PetscInt` array to match the first.

1159:    Not Collective

1161:    Input Parameter:
1162: .  n   - number of values

1164:    Input/Output Parameters:
1165: +  arr1 - array of integers to be sorted, modified on output
1166: -  arr2 - array of integers to be reordered, modified on output

1168:    Notes:
1169:    The arrays CANNOT overlap.

1171:    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1172:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1173:    recommended that the user benchmark their code to see which routine is fastest.

1175:    Level: intermediate

1177: .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()`
1178: @*/
1179: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1180: {
1181:   if (n <= 1) return 0;
1184:   /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1185:   PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);
1186:   return 0;
1187: }

1189: /*@
1190:    PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order.

1192:    Not Collective

1194:    Input Parameters:
1195: +  n   - number of values
1196: -  arr - array of `PetscMPIInt`

1198:    Output Parameters:
1199: .  arr - sorted array of integers

1201:    Notes:
1202:    If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used.

1204:    This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is
1205:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1206:    recommended that the user benchmark their code to see which routine is fastest.

1208:    Level: intermediate

1210: .seealso: `PetscTimSort()`, `PetscSortMPIInt()`
1211: @*/
1212: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1213: {
1214:   if (n <= 1) return 0;
1216:   if (n < 64) {
1217:     PetscSortMPIInt(n, arr);
1218:   } else {
1219:     PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1220:   }
1221:   return 0;
1222: }

1224: /*@
1225:    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt`
1226:    array to match the first.

1228:    Not Collective

1230:    Input Parameter:
1231: .  n   - number of values

1233:    Input/Output Parameters:
1234: .  arr1 - array of integers to be sorted, modified on output
1235: -  arr2 - array of integers to be reordered, modified on output

1237:    Notes:
1238:    The arrays CANNOT overlap.

1240:    This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is
1241:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1242:    recommended that the user benchmark their code to see which routine is fastest.

1244:    Level: intermediate

1246: .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()`
1247: @*/
1248: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1249: {
1250:   if (n <= 1) return 0;
1253:   /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1254:   PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);
1255:   return 0;
1256: }

1258: /*@
1259:    PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order.

1261:    Not Collective

1263:    Input Parameters:
1264: +  n   - number of values
1265: -  arr - array of `PetscReal`

1267:    Output Parameters:
1268: .  arr - sorted array of integers

1270:    Notes:
1271:    If the array is less than 64 entries long `PetscSortReal()` is automatically used.

1273:    This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is
1274:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1275:    recommended that the user benchmark their code to see which routine is fastest.

1277:    Level: intermediate

1279: .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()`
1280: @*/
1281: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1282: {
1283:   if (n <= 1) return 0;
1285:   if (n < 64) {
1286:     PetscSortReal(n, arr);
1287:   } else {
1288:     PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);
1289:   }
1290:   return 0;
1291: }

1293: /*@
1294:    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second
1295:    array of `PetscInt` to match the first.

1297:    Not Collective

1299:    Input Parameter:
1300: .  n   - number of values

1302:    Input/Output Parameters:
1303: .  arr1 - array of `PetscReal` to be sorted, modified on output
1304: -  arr2 - array of `PetscInt` to be reordered, modified on output

1306:    Notes:
1307:    This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is
1308:    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1309:    recommended that the user benchmark their code to see which routine is fastest.

1311:    Level: intermediate

1313: .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()`
1314: @*/
1315: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1316: {
1317:   if (n <= 1) return 0;
1320:   /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1321:   PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);
1322:   return 0;
1323: }