Actual source code: matstash.c
2: #include <petsc/private/matimpl.h>
4: #define DEFAULT_STASH_SIZE 10000
6: static PetscErrorCode MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *);
7: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
8: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
9: #if !defined(PETSC_HAVE_MPIUNI)
10: static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *);
11: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
12: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *);
13: #endif
15: /*
16: MatStashCreate_Private - Creates a stash,currently used for all the parallel
17: matrix implementations. The stash is where elements of a matrix destined
18: to be stored on other processors are kept until matrix assembly is done.
20: This is a simple minded stash. Simply adds entries to end of stash.
22: Input Parameters:
23: comm - communicator, required for scatters.
24: bs - stash block size. used when stashing blocks of values
26: Output Parameters:
27: stash - the newly created stash
28: */
29: PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash)
30: {
31: PetscInt max, *opt, nopt, i;
32: PetscBool flg;
34: /* Require 2 tags,get the second using PetscCommGetNewTag() */
35: stash->comm = comm;
37: PetscCommGetNewTag(stash->comm, &stash->tag1);
38: PetscCommGetNewTag(stash->comm, &stash->tag2);
39: MPI_Comm_size(stash->comm, &stash->size);
40: MPI_Comm_rank(stash->comm, &stash->rank);
41: PetscMalloc1(2 * stash->size, &stash->flg_v);
42: for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
44: nopt = stash->size;
45: PetscMalloc1(nopt, &opt);
46: PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg);
47: if (flg) {
48: if (nopt == 1) max = opt[0];
49: else if (nopt == stash->size) max = opt[stash->rank];
50: else if (stash->rank < nopt) max = opt[stash->rank];
51: else max = 0; /* Use default */
52: stash->umax = max;
53: } else {
54: stash->umax = 0;
55: }
56: PetscFree(opt);
57: if (bs <= 0) bs = 1;
59: stash->bs = bs;
60: stash->nmax = 0;
61: stash->oldnmax = 0;
62: stash->n = 0;
63: stash->reallocs = -1;
64: stash->space_head = NULL;
65: stash->space = NULL;
67: stash->send_waits = NULL;
68: stash->recv_waits = NULL;
69: stash->send_status = NULL;
70: stash->nsends = 0;
71: stash->nrecvs = 0;
72: stash->svalues = NULL;
73: stash->rvalues = NULL;
74: stash->rindices = NULL;
75: stash->nprocessed = 0;
76: stash->reproduce = PETSC_FALSE;
77: stash->blocktype = MPI_DATATYPE_NULL;
79: PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL);
80: #if !defined(PETSC_HAVE_MPIUNI)
81: flg = PETSC_FALSE;
82: PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL);
83: if (!flg) {
84: stash->ScatterBegin = MatStashScatterBegin_BTS;
85: stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
86: stash->ScatterEnd = MatStashScatterEnd_BTS;
87: stash->ScatterDestroy = MatStashScatterDestroy_BTS;
88: } else {
89: #endif
90: stash->ScatterBegin = MatStashScatterBegin_Ref;
91: stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
92: stash->ScatterEnd = MatStashScatterEnd_Ref;
93: stash->ScatterDestroy = NULL;
94: #if !defined(PETSC_HAVE_MPIUNI)
95: }
96: #endif
97: return 0;
98: }
100: /*
101: MatStashDestroy_Private - Destroy the stash
102: */
103: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
104: {
105: PetscMatStashSpaceDestroy(&stash->space_head);
106: if (stash->ScatterDestroy) (*stash->ScatterDestroy)(stash);
108: stash->space = NULL;
110: PetscFree(stash->flg_v);
111: return 0;
112: }
114: /*
115: MatStashScatterEnd_Private - This is called as the final stage of
116: scatter. The final stages of message passing is done here, and
117: all the memory used for message passing is cleaned up. This
118: routine also resets the stash, and deallocates the memory used
119: for the stash. It also keeps track of the current memory usage
120: so that the same value can be used the next time through.
121: */
122: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
123: {
124: (*stash->ScatterEnd)(stash);
125: return 0;
126: }
128: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
129: {
130: PetscInt nsends = stash->nsends, bs2, oldnmax, i;
131: MPI_Status *send_status;
133: for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1;
134: /* wait on sends */
135: if (nsends) {
136: PetscMalloc1(2 * nsends, &send_status);
137: MPI_Waitall(2 * nsends, stash->send_waits, send_status);
138: PetscFree(send_status);
139: }
141: /* Now update nmaxold to be app 10% more than max n used, this way the
142: wastage of space is reduced the next time this stash is used.
143: Also update the oldmax, only if it increases */
144: if (stash->n) {
145: bs2 = stash->bs * stash->bs;
146: oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
147: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
148: }
150: stash->nmax = 0;
151: stash->n = 0;
152: stash->reallocs = -1;
153: stash->nprocessed = 0;
155: PetscMatStashSpaceDestroy(&stash->space_head);
157: stash->space = NULL;
159: PetscFree(stash->send_waits);
160: PetscFree(stash->recv_waits);
161: PetscFree2(stash->svalues, stash->sindices);
162: PetscFree(stash->rvalues[0]);
163: PetscFree(stash->rvalues);
164: PetscFree(stash->rindices[0]);
165: PetscFree(stash->rindices);
166: return 0;
167: }
169: /*
170: MatStashGetInfo_Private - Gets the relevant statistics of the stash
172: Input Parameters:
173: stash - the stash
174: nstash - the size of the stash. Indicates the number of values stored.
175: reallocs - the number of additional mallocs incurred.
177: */
178: PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs)
179: {
180: PetscInt bs2 = stash->bs * stash->bs;
182: if (nstash) *nstash = stash->n * bs2;
183: if (reallocs) {
184: if (stash->reallocs < 0) *reallocs = 0;
185: else *reallocs = stash->reallocs;
186: }
187: return 0;
188: }
190: /*
191: MatStashSetInitialSize_Private - Sets the initial size of the stash
193: Input Parameters:
194: stash - the stash
195: max - the value that is used as the max size of the stash.
196: this value is used while allocating memory.
197: */
198: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max)
199: {
200: stash->umax = max;
201: return 0;
202: }
204: /* MatStashExpand_Private - Expand the stash. This function is called
205: when the space in the stash is not sufficient to add the new values
206: being inserted into the stash.
208: Input Parameters:
209: stash - the stash
210: incr - the minimum increase requested
212: Note:
213: This routine doubles the currently used memory.
214: */
215: static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr)
216: {
217: PetscInt newnmax, bs2 = stash->bs * stash->bs;
219: /* allocate a larger stash */
220: if (!stash->oldnmax && !stash->nmax) { /* new stash */
221: if (stash->umax) newnmax = stash->umax / bs2;
222: else newnmax = DEFAULT_STASH_SIZE / bs2;
223: } else if (!stash->nmax) { /* resuing stash */
224: if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2;
225: else newnmax = stash->oldnmax / bs2;
226: } else newnmax = stash->nmax * 2;
227: if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
229: /* Get a MatStashSpace and attach it to stash */
230: PetscMatStashSpaceGet(bs2, newnmax, &stash->space);
231: if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
232: stash->space_head = stash->space;
233: }
235: stash->reallocs++;
236: stash->nmax = newnmax;
237: return 0;
238: }
239: /*
240: MatStashValuesRow_Private - inserts values into the stash. This function
241: expects the values to be roworiented. Multiple columns belong to the same row
242: can be inserted with a single call to this function.
244: Input Parameters:
245: stash - the stash
246: row - the global row correspoiding to the values
247: n - the number of elements inserted. All elements belong to the above row.
248: idxn - the global column indices corresponding to each of the values.
249: values - the values inserted
250: */
251: PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries)
252: {
253: PetscInt i, k, cnt = 0;
254: PetscMatStashSpace space = stash->space;
256: /* Check and see if we have sufficient memory */
257: if (!space || space->local_remaining < n) MatStashExpand_Private(stash, n);
258: space = stash->space;
259: k = space->local_used;
260: for (i = 0; i < n; i++) {
261: if (ignorezeroentries && values && values[i] == 0.0) continue;
262: space->idx[k] = row;
263: space->idy[k] = idxn[i];
264: space->val[k] = values ? values[i] : 0.0;
265: k++;
266: cnt++;
267: }
268: stash->n += cnt;
269: space->local_used += cnt;
270: space->local_remaining -= cnt;
271: return 0;
272: }
274: /*
275: MatStashValuesCol_Private - inserts values into the stash. This function
276: expects the values to be columnoriented. Multiple columns belong to the same row
277: can be inserted with a single call to this function.
279: Input Parameters:
280: stash - the stash
281: row - the global row correspoiding to the values
282: n - the number of elements inserted. All elements belong to the above row.
283: idxn - the global column indices corresponding to each of the values.
284: values - the values inserted
285: stepval - the consecutive values are sepated by a distance of stepval.
286: this happens because the input is columnoriented.
287: */
288: PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries)
289: {
290: PetscInt i, k, cnt = 0;
291: PetscMatStashSpace space = stash->space;
293: /* Check and see if we have sufficient memory */
294: if (!space || space->local_remaining < n) MatStashExpand_Private(stash, n);
295: space = stash->space;
296: k = space->local_used;
297: for (i = 0; i < n; i++) {
298: if (ignorezeroentries && values && values[i * stepval] == 0.0) continue;
299: space->idx[k] = row;
300: space->idy[k] = idxn[i];
301: space->val[k] = values ? values[i * stepval] : 0.0;
302: k++;
303: cnt++;
304: }
305: stash->n += cnt;
306: space->local_used += cnt;
307: space->local_remaining -= cnt;
308: return 0;
309: }
311: /*
312: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
313: This function expects the values to be roworiented. Multiple columns belong
314: to the same block-row can be inserted with a single call to this function.
315: This function extracts the sub-block of values based on the dimensions of
316: the original input block, and the row,col values corresponding to the blocks.
318: Input Parameters:
319: stash - the stash
320: row - the global block-row correspoiding to the values
321: n - the number of elements inserted. All elements belong to the above row.
322: idxn - the global block-column indices corresponding to each of the blocks of
323: values. Each block is of size bs*bs.
324: values - the values inserted
325: rmax - the number of block-rows in the original block.
326: cmax - the number of block-columns on the original block.
327: idx - the index of the current block-row in the original block.
328: */
329: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
330: {
331: PetscInt i, j, k, bs2, bs = stash->bs, l;
332: const PetscScalar *vals;
333: PetscScalar *array;
334: PetscMatStashSpace space = stash->space;
336: if (!space || space->local_remaining < n) MatStashExpand_Private(stash, n);
337: space = stash->space;
338: l = space->local_used;
339: bs2 = bs * bs;
340: for (i = 0; i < n; i++) {
341: space->idx[l] = row;
342: space->idy[l] = idxn[i];
343: /* Now copy over the block of values. Store the values column oriented.
344: This enables inserting multiple blocks belonging to a row with a single
345: function call */
346: array = space->val + bs2 * l;
347: vals = values + idx * bs2 * n + bs * i;
348: for (j = 0; j < bs; j++) {
349: for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0;
350: array++;
351: vals += cmax * bs;
352: }
353: l++;
354: }
355: stash->n += n;
356: space->local_used += n;
357: space->local_remaining -= n;
358: return 0;
359: }
361: /*
362: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
363: This function expects the values to be roworiented. Multiple columns belong
364: to the same block-row can be inserted with a single call to this function.
365: This function extracts the sub-block of values based on the dimensions of
366: the original input block, and the row,col values corresponding to the blocks.
368: Input Parameters:
369: stash - the stash
370: row - the global block-row correspoiding to the values
371: n - the number of elements inserted. All elements belong to the above row.
372: idxn - the global block-column indices corresponding to each of the blocks of
373: values. Each block is of size bs*bs.
374: values - the values inserted
375: rmax - the number of block-rows in the original block.
376: cmax - the number of block-columns on the original block.
377: idx - the index of the current block-row in the original block.
378: */
379: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx)
380: {
381: PetscInt i, j, k, bs2, bs = stash->bs, l;
382: const PetscScalar *vals;
383: PetscScalar *array;
384: PetscMatStashSpace space = stash->space;
386: if (!space || space->local_remaining < n) MatStashExpand_Private(stash, n);
387: space = stash->space;
388: l = space->local_used;
389: bs2 = bs * bs;
390: for (i = 0; i < n; i++) {
391: space->idx[l] = row;
392: space->idy[l] = idxn[i];
393: /* Now copy over the block of values. Store the values column oriented.
394: This enables inserting multiple blocks belonging to a row with a single
395: function call */
396: array = space->val + bs2 * l;
397: vals = values + idx * bs2 * n + bs * i;
398: for (j = 0; j < bs; j++) {
399: for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0;
400: array += bs;
401: vals += rmax * bs;
402: }
403: l++;
404: }
405: stash->n += n;
406: space->local_used += n;
407: space->local_remaining -= n;
408: return 0;
409: }
410: /*
411: MatStashScatterBegin_Private - Initiates the transfer of values to the
412: correct owners. This function goes through the stash, and check the
413: owners of each stashed value, and sends the values off to the owner
414: processors.
416: Input Parameters:
417: stash - the stash
418: owners - an array of size 'no-of-procs' which gives the ownership range
419: for each node.
421: Note:
422: The 'owners' array in the cased of the blocked-stash has the
423: ranges specified blocked global indices, and for the regular stash in
424: the proper global indices.
425: */
426: PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners)
427: {
428: (*stash->ScatterBegin)(mat, stash, owners);
429: return 0;
430: }
432: static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners)
433: {
434: PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
435: PetscInt size = stash->size, nsends;
436: PetscInt count, *sindices, **rindices, i, j, idx, lastidx, l;
437: PetscScalar **rvalues, *svalues;
438: MPI_Comm comm = stash->comm;
439: MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
440: PetscMPIInt *sizes, *nlengths, nreceives;
441: PetscInt *sp_idx, *sp_idy;
442: PetscScalar *sp_val;
443: PetscMatStashSpace space, space_next;
445: { /* make sure all processors are either in INSERTMODE or ADDMODE */
446: InsertMode addv;
447: MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat));
449: mat->insertmode = addv; /* in case this processor had no cache */
450: }
452: bs2 = stash->bs * stash->bs;
454: /* first count number of contributors to each processor */
455: PetscCalloc1(size, &nlengths);
456: PetscMalloc1(stash->n + 1, &owner);
458: i = j = 0;
459: lastidx = -1;
460: space = stash->space_head;
461: while (space) {
462: space_next = space->next;
463: sp_idx = space->idx;
464: for (l = 0; l < space->local_used; l++) {
465: /* if indices are NOT locally sorted, need to start search at the beginning */
466: if (lastidx > (idx = sp_idx[l])) j = 0;
467: lastidx = idx;
468: for (; j < size; j++) {
469: if (idx >= owners[j] && idx < owners[j + 1]) {
470: nlengths[j]++;
471: owner[i] = j;
472: break;
473: }
474: }
475: i++;
476: }
477: space = space_next;
478: }
480: /* Now check what procs get messages - and compute nsends. */
481: PetscCalloc1(size, &sizes);
482: for (i = 0, nsends = 0; i < size; i++) {
483: if (nlengths[i]) {
484: sizes[i] = 1;
485: nsends++;
486: }
487: }
489: {
490: PetscMPIInt *onodes, *olengths;
491: /* Determine the number of messages to expect, their lengths, from from-ids */
492: PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives);
493: PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths);
494: /* since clubbing row,col - lengths are multiplied by 2 */
495: for (i = 0; i < nreceives; i++) olengths[i] *= 2;
496: PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1);
497: /* values are size 'bs2' lengths (and remove earlier factor 2 */
498: for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
499: PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2);
500: PetscFree(onodes);
501: PetscFree(olengths);
502: }
504: /* do sends:
505: 1) starts[i] gives the starting index in svalues for stuff going to
506: the ith processor
507: */
508: PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices);
509: PetscMalloc1(2 * nsends, &send_waits);
510: PetscMalloc2(size, &startv, size, &starti);
511: /* use 2 sends the first with all_a, the next with all_i and all_j */
512: startv[0] = 0;
513: starti[0] = 0;
514: for (i = 1; i < size; i++) {
515: startv[i] = startv[i - 1] + nlengths[i - 1];
516: starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
517: }
519: i = 0;
520: space = stash->space_head;
521: while (space) {
522: space_next = space->next;
523: sp_idx = space->idx;
524: sp_idy = space->idy;
525: sp_val = space->val;
526: for (l = 0; l < space->local_used; l++) {
527: j = owner[i];
528: if (bs2 == 1) {
529: svalues[startv[j]] = sp_val[l];
530: } else {
531: PetscInt k;
532: PetscScalar *buf1, *buf2;
533: buf1 = svalues + bs2 * startv[j];
534: buf2 = space->val + bs2 * l;
535: for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
536: }
537: sindices[starti[j]] = sp_idx[l];
538: sindices[starti[j] + nlengths[j]] = sp_idy[l];
539: startv[j]++;
540: starti[j]++;
541: i++;
542: }
543: space = space_next;
544: }
545: startv[0] = 0;
546: for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
548: for (i = 0, count = 0; i < size; i++) {
549: if (sizes[i]) {
550: MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++);
551: MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++);
552: }
553: }
554: #if defined(PETSC_USE_INFO)
555: PetscInfo(NULL, "No of messages: %" PetscInt_FMT " \n", nsends);
556: for (i = 0; i < size; i++) {
557: if (sizes[i]) PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))));
558: }
559: #endif
560: PetscFree(nlengths);
561: PetscFree(owner);
562: PetscFree2(startv, starti);
563: PetscFree(sizes);
565: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
566: PetscMalloc1(2 * nreceives, &recv_waits);
568: for (i = 0; i < nreceives; i++) {
569: recv_waits[2 * i] = recv_waits1[i];
570: recv_waits[2 * i + 1] = recv_waits2[i];
571: }
572: stash->recv_waits = recv_waits;
574: PetscFree(recv_waits1);
575: PetscFree(recv_waits2);
577: stash->svalues = svalues;
578: stash->sindices = sindices;
579: stash->rvalues = rvalues;
580: stash->rindices = rindices;
581: stash->send_waits = send_waits;
582: stash->nsends = nsends;
583: stash->nrecvs = nreceives;
584: stash->reproduce_count = 0;
585: return 0;
586: }
588: /*
589: MatStashScatterGetMesg_Private - This function waits on the receives posted
590: in the function MatStashScatterBegin_Private() and returns one message at
591: a time to the calling function. If no messages are left, it indicates this
592: by setting flg = 0, else it sets flg = 1.
594: Input Parameters:
595: stash - the stash
597: Output Parameters:
598: nvals - the number of entries in the current message.
599: rows - an array of row indices (or blocked indices) corresponding to the values
600: cols - an array of columnindices (or blocked indices) corresponding to the values
601: vals - the values
602: flg - 0 indicates no more message left, and the current call has no values associated.
603: 1 indicates that the current call successfully received a message, and the
604: other output parameters nvals,rows,cols,vals are set appropriately.
605: */
606: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
607: {
608: (*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg);
609: return 0;
610: }
612: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg)
613: {
614: PetscMPIInt i, *flg_v = stash->flg_v, i1, i2;
615: PetscInt bs2;
616: MPI_Status recv_status;
617: PetscBool match_found = PETSC_FALSE;
619: *flg = 0; /* When a message is discovered this is reset to 1 */
620: /* Return if no more messages to process */
621: if (stash->nprocessed == stash->nrecvs) return 0;
623: bs2 = stash->bs * stash->bs;
624: /* If a matching pair of receives are found, process them, and return the data to
625: the calling function. Until then keep receiving messages */
626: while (!match_found) {
627: if (stash->reproduce) {
628: i = stash->reproduce_count++;
629: MPI_Wait(stash->recv_waits + i, &recv_status);
630: } else {
631: MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status);
632: }
635: /* Now pack the received message into a structure which is usable by others */
636: if (i % 2) {
637: MPI_Get_count(&recv_status, MPIU_SCALAR, nvals);
638: flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
639: *nvals = *nvals / bs2;
640: } else {
641: MPI_Get_count(&recv_status, MPIU_INT, nvals);
642: flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
643: *nvals = *nvals / 2; /* This message has both row indices and col indices */
644: }
646: /* Check if we have both messages from this proc */
647: i1 = flg_v[2 * recv_status.MPI_SOURCE];
648: i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
649: if (i1 != -1 && i2 != -1) {
650: *rows = stash->rindices[i2];
651: *cols = *rows + *nvals;
652: *vals = stash->rvalues[i1];
653: *flg = 1;
654: stash->nprocessed++;
655: match_found = PETSC_TRUE;
656: }
657: }
658: return 0;
659: }
661: #if !defined(PETSC_HAVE_MPIUNI)
662: typedef struct {
663: PetscInt row;
664: PetscInt col;
665: PetscScalar vals[1]; /* Actually an array of length bs2 */
666: } MatStashBlock;
668: static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode)
669: {
670: PetscMatStashSpace space;
671: PetscInt n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i;
672: PetscScalar **valptr;
674: PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm);
675: for (space = stash->space_head, cnt = 0; space; space = space->next) {
676: for (i = 0; i < space->local_used; i++) {
677: row[cnt] = space->idx[i];
678: col[cnt] = space->idy[i];
679: valptr[cnt] = &space->val[i * bs2];
680: perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
681: cnt++;
682: }
683: }
685: PetscSortIntWithArrayPair(n, row, col, perm);
686: /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
687: for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) {
688: if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
689: PetscInt colstart;
690: PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart]);
691: for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */
692: PetscInt j, l;
693: MatStashBlock *block;
694: PetscSegBufferGet(stash->segsendblocks, 1, &block);
695: block->row = row[rowstart];
696: block->col = col[colstart];
697: PetscArraycpy(block->vals, valptr[perm[colstart]], bs2);
698: for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
699: if (insertmode == ADD_VALUES) {
700: for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l];
701: } else {
702: PetscArraycpy(block->vals, valptr[perm[j]], bs2);
703: }
704: }
705: colstart = j;
706: }
707: rowstart = i;
708: }
709: }
710: PetscFree4(row, col, valptr, perm);
711: return 0;
712: }
714: static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
715: {
716: if (stash->blocktype == MPI_DATATYPE_NULL) {
717: PetscInt bs2 = PetscSqr(stash->bs);
718: PetscMPIInt blocklens[2];
719: MPI_Aint displs[2];
720: MPI_Datatype types[2], stype;
721: /*
722: DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex.
723: std::complex itself has standard layout, so does DummyBlock, recursively.
724: To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout,
725: though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++.
726: offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof.
728: We can test if std::complex has standard layout with the following code:
729: #include <iostream>
730: #include <type_traits>
731: #include <complex>
732: int main() {
733: std::cout << std::boolalpha;
734: std::cout << std::is_standard_layout<std::complex<double> >::value << '\n';
735: }
736: Output: true
737: */
738: struct DummyBlock {
739: PetscInt row, col;
740: PetscScalar vals;
741: };
743: stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar);
744: if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
745: stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
746: }
747: PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks);
748: PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks);
749: PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe);
750: blocklens[0] = 2;
751: blocklens[1] = bs2;
752: displs[0] = offsetof(struct DummyBlock, row);
753: displs[1] = offsetof(struct DummyBlock, vals);
754: types[0] = MPIU_INT;
755: types[1] = MPIU_SCALAR;
756: MPI_Type_create_struct(2, blocklens, displs, types, &stype);
757: MPI_Type_commit(&stype);
758: MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype);
759: MPI_Type_commit(&stash->blocktype);
760: MPI_Type_free(&stype);
761: }
762: return 0;
763: }
765: /* Callback invoked after target rank has initiated receive of rendezvous message.
766: * Here we post the main sends.
767: */
768: static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx)
769: {
770: MatStash *stash = (MatStash *)ctx;
771: MatStashHeader *hdr = (MatStashHeader *)sdata;
774: MPI_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]);
775: stash->sendframes[rankid].count = hdr->count;
776: stash->sendframes[rankid].pending = 1;
777: return 0;
778: }
780: /*
781: Callback invoked by target after receiving rendezvous message.
782: Here we post the main recvs.
783: */
784: static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx)
785: {
786: MatStash *stash = (MatStash *)ctx;
787: MatStashHeader *hdr = (MatStashHeader *)rdata;
788: MatStashFrame *frame;
790: PetscSegBufferGet(stash->segrecvframe, 1, &frame);
791: PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer);
792: MPI_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0]);
793: frame->count = hdr->count;
794: frame->pending = 1;
795: return 0;
796: }
798: /*
799: * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
800: */
801: static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[])
802: {
803: size_t nblocks;
804: char *sendblocks;
806: if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */
807: InsertMode addv;
808: MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat));
810: }
812: MatStashBlockTypeSetUp(stash);
813: MatStashSortCompress_Private(stash, mat->insertmode);
814: PetscSegBufferGetSize(stash->segsendblocks, &nblocks);
815: PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks);
816: if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */
817: PetscInt i;
818: size_t b;
819: for (i = 0, b = 0; i < stash->nsendranks; i++) {
820: stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size];
821: /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
822: stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
823: for (; b < nblocks; b++) {
824: MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size];
826: if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break;
827: stash->sendhdr[i].count++;
828: }
829: }
830: } else { /* Dynamically count and pack (first time) */
831: PetscInt sendno;
832: size_t i, rowstart;
834: /* Count number of send ranks and allocate for sends */
835: stash->nsendranks = 0;
836: for (rowstart = 0; rowstart < nblocks;) {
837: PetscInt owner;
838: MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
839: PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner);
840: if (owner < 0) owner = -(owner + 2);
841: for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
842: MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
843: if (sendblock_i->row >= owners[owner + 1]) break;
844: }
845: stash->nsendranks++;
846: rowstart = i;
847: }
848: PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes);
850: /* Set up sendhdrs and sendframes */
851: sendno = 0;
852: for (rowstart = 0; rowstart < nblocks;) {
853: PetscInt owner;
854: MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size];
855: PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner);
856: if (owner < 0) owner = -(owner + 2);
857: stash->sendranks[sendno] = owner;
858: for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */
859: MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
860: if (sendblock_i->row >= owners[owner + 1]) break;
861: }
862: stash->sendframes[sendno].buffer = sendblock_rowstart;
863: stash->sendframes[sendno].pending = 0;
864: stash->sendhdr[sendno].count = i - rowstart;
865: sendno++;
866: rowstart = i;
867: }
869: }
871: /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
872: * message or a dummy entry of some sort. */
873: if (mat->insertmode == INSERT_VALUES) {
874: size_t i;
875: for (i = 0; i < nblocks; i++) {
876: MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size];
877: sendblock_i->row = -(sendblock_i->row + 1);
878: }
879: }
881: if (stash->first_assembly_done) {
882: PetscMPIInt i, tag;
883: PetscCommGetNewTag(stash->comm, &tag);
884: for (i = 0; i < stash->nrecvranks; i++) MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash);
885: for (i = 0; i < stash->nsendranks; i++) MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash);
886: stash->use_status = PETSC_TRUE; /* Use count from message status. */
887: } else {
888: PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash);
889: PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses);
890: stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
891: }
893: PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes);
894: stash->recvframe_active = NULL;
895: stash->recvframe_i = 0;
896: stash->some_i = 0;
897: stash->some_count = 0;
898: stash->recvcount = 0;
899: stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */
900: stash->insertmode = &mat->insertmode;
901: return 0;
902: }
904: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg)
905: {
906: MatStashBlock *block;
908: *flg = 0;
909: while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
910: if (stash->some_i == stash->some_count) {
911: if (stash->recvcount == stash->nrecvranks) return 0; /* Done */
912: MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE);
913: stash->some_i = 0;
914: }
915: stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
916: stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
917: if (stash->use_status) { /* Count what was actually sent */
918: MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &stash->recvframe_count);
919: }
920: if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
921: block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0];
922: if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
925: }
926: stash->some_i++;
927: stash->recvcount++;
928: stash->recvframe_i = 0;
929: }
930: *n = 1;
931: block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size];
932: if (block->row < 0) block->row = -(block->row + 1);
933: *row = &block->row;
934: *col = &block->col;
935: *val = block->vals;
936: stash->recvframe_i++;
937: *flg = 1;
938: return 0;
939: }
941: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
942: {
943: MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE);
944: if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */
945: PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL);
946: } else { /* No reuse, so collect everything. */
947: MatStashScatterDestroy_BTS(stash);
948: }
950: /* Now update nmaxold to be app 10% more than max n used, this way the
951: wastage of space is reduced the next time this stash is used.
952: Also update the oldmax, only if it increases */
953: if (stash->n) {
954: PetscInt bs2 = stash->bs * stash->bs;
955: PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2;
956: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
957: }
959: stash->nmax = 0;
960: stash->n = 0;
961: stash->reallocs = -1;
962: stash->nprocessed = 0;
964: PetscMatStashSpaceDestroy(&stash->space_head);
966: stash->space = NULL;
968: return 0;
969: }
971: PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
972: {
973: PetscSegBufferDestroy(&stash->segsendblocks);
974: PetscSegBufferDestroy(&stash->segrecvframe);
975: stash->recvframes = NULL;
976: PetscSegBufferDestroy(&stash->segrecvblocks);
977: if (stash->blocktype != MPI_DATATYPE_NULL) MPI_Type_free(&stash->blocktype);
978: stash->nsendranks = 0;
979: stash->nrecvranks = 0;
980: PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes);
981: PetscFree(stash->sendreqs);
982: PetscFree(stash->recvreqs);
983: PetscFree(stash->recvranks);
984: PetscFree(stash->recvhdr);
985: PetscFree2(stash->some_indices, stash->some_statuses);
986: return 0;
987: }
988: #endif