Actual source code: vecstash.c
2: #include <petsc/private/vecimpl.h>
4: #define DEFAULT_STASH_SIZE 100
6: /*
7: VecStashCreate_Private - Creates a stash,currently used for all the parallel
8: matrix implementations. The stash is where elements of a matrix destined
9: to be stored on other processors are kept until matrix assembly is done.
11: This is a simple minded stash. Simply adds entries to end of stash.
13: Input Parameters:
14: comm - communicator, required for scatters.
15: bs - stash block size. used when stashing blocks of values
17: Output Parameters:
18: stash - the newly created stash
19: */
20: PetscErrorCode VecStashCreate_Private(MPI_Comm comm, PetscInt bs, VecStash *stash)
21: {
22: PetscInt max, *opt, nopt;
23: PetscBool flg;
25: /* Require 2 tags, get the second using PetscCommGetNewTag() */
26: stash->comm = comm;
27: PetscCommGetNewTag(stash->comm, &stash->tag1);
28: PetscCommGetNewTag(stash->comm, &stash->tag2);
29: MPI_Comm_size(stash->comm, &stash->size);
30: MPI_Comm_rank(stash->comm, &stash->rank);
32: nopt = stash->size;
33: PetscMalloc1(nopt, &opt);
34: PetscOptionsGetIntArray(NULL, NULL, "-vecstash_initial_size", opt, &nopt, &flg);
35: if (flg) {
36: if (nopt == 1) max = opt[0];
37: else if (nopt == stash->size) max = opt[stash->rank];
38: else if (stash->rank < nopt) max = opt[stash->rank];
39: else max = 0; /* use default */
40: stash->umax = max;
41: } else {
42: stash->umax = 0;
43: }
44: PetscFree(opt);
46: if (bs <= 0) bs = 1;
48: stash->bs = bs;
49: stash->nmax = 0;
50: stash->oldnmax = 0;
51: stash->n = 0;
52: stash->reallocs = -1;
53: stash->idx = NULL;
54: stash->array = NULL;
56: stash->send_waits = NULL;
57: stash->recv_waits = NULL;
58: stash->send_status = NULL;
59: stash->nsends = 0;
60: stash->nrecvs = 0;
61: stash->svalues = NULL;
62: stash->rvalues = NULL;
63: stash->rmax = 0;
64: stash->nprocs = NULL;
65: stash->nprocessed = 0;
66: stash->donotstash = PETSC_FALSE;
67: stash->ignorenegidx = PETSC_FALSE;
68: return 0;
69: }
71: /*
72: VecStashDestroy_Private - Destroy the stash
73: */
74: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
75: {
76: PetscFree2(stash->array, stash->idx);
77: PetscFree(stash->bowners);
78: return 0;
79: }
81: /*
82: VecStashScatterEnd_Private - This is called as the fial stage of
83: scatter. The final stages of message passing is done here, and
84: all the memory used for message passing is cleanedu up. This
85: routine also resets the stash, and deallocates the memory used
86: for the stash. It also keeps track of the current memory usage
87: so that the same value can be used the next time through.
88: */
89: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
90: {
91: PetscInt nsends = stash->nsends, oldnmax;
92: MPI_Status *send_status;
94: /* wait on sends */
95: if (nsends) {
96: PetscMalloc1(2 * nsends, &send_status);
97: MPI_Waitall(2 * nsends, stash->send_waits, send_status);
98: PetscFree(send_status);
99: }
101: /* Now update nmaxold to be app 10% more than max n, this way the
102: wastage of space is reduced the next time this stash is used.
103: Also update the oldmax, only if it increases */
104: if (stash->n) {
105: oldnmax = ((PetscInt)(stash->n * 1.1) + 5) * stash->bs;
106: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
107: }
109: stash->nmax = 0;
110: stash->n = 0;
111: stash->reallocs = -1;
112: stash->rmax = 0;
113: stash->nprocessed = 0;
115: PetscFree2(stash->array, stash->idx);
116: stash->array = NULL;
117: stash->idx = NULL;
118: PetscFree(stash->send_waits);
119: PetscFree(stash->recv_waits);
120: PetscFree2(stash->svalues, stash->sindices);
121: PetscFree2(stash->rvalues, stash->rindices);
122: PetscFree(stash->nprocs);
123: return 0;
124: }
126: /*
127: VecStashGetInfo_Private - Gets the relevant statistics of the stash
129: Input Parameters:
130: stash - the stash
131: nstash - the size of the stash
132: reallocs - the number of additional mallocs incurred.
134: */
135: PetscErrorCode VecStashGetInfo_Private(VecStash *stash, PetscInt *nstash, PetscInt *reallocs)
136: {
137: if (nstash) *nstash = stash->n * stash->bs;
138: if (reallocs) {
139: if (stash->reallocs < 0) *reallocs = 0;
140: else *reallocs = stash->reallocs;
141: }
142: return 0;
143: }
145: /*
146: VecStashSetInitialSize_Private - Sets the initial size of the stash
148: Input Parameters:
149: stash - the stash
150: max - the value that is used as the max size of the stash.
151: this value is used while allocating memory. It specifies
152: the number of vals stored, even with the block-stash
153: */
154: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash, PetscInt max)
155: {
156: stash->umax = max;
157: return 0;
158: }
160: /* VecStashExpand_Private - Expand the stash. This function is called
161: when the space in the stash is not sufficient to add the new values
162: being inserted into the stash.
164: Input Parameters:
165: stash - the stash
166: incr - the minimum increase requested
168: Notes:
169: This routine doubles the currently used memory.
170: */
171: PetscErrorCode VecStashExpand_Private(VecStash *stash, PetscInt incr)
172: {
173: PetscInt *n_idx, newnmax, bs = stash->bs;
174: PetscScalar *n_array;
176: /* allocate a larger stash. */
177: if (!stash->oldnmax && !stash->nmax) { /* new stash */
178: if (stash->umax) newnmax = stash->umax / bs;
179: else newnmax = DEFAULT_STASH_SIZE / bs;
180: } else if (!stash->nmax) { /* resuing stash */
181: if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs;
182: else newnmax = stash->oldnmax / bs;
183: } else newnmax = stash->nmax * 2;
185: if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;
187: PetscMalloc2(bs * newnmax, &n_array, newnmax, &n_idx);
188: PetscMemcpy(n_array, stash->array, bs * stash->nmax * sizeof(PetscScalar));
189: PetscMemcpy(n_idx, stash->idx, stash->nmax * sizeof(PetscInt));
190: PetscFree2(stash->array, stash->idx);
192: stash->array = n_array;
193: stash->idx = n_idx;
194: stash->nmax = newnmax;
195: stash->reallocs++;
196: return 0;
197: }
198: /*
199: VecStashScatterBegin_Private - Initiates the transfer of values to the
200: correct owners. This function goes through the stash, and check the
201: owners of each stashed value, and sends the values off to the owner
202: processors.
204: Input Parameters:
205: stash - the stash
206: owners - an array of size 'no-of-procs' which gives the ownership range
207: for each node.
209: Notes:
210: The 'owners' array in the cased of the blocked-stash has the
211: ranges specified blocked global indices, and for the regular stash in
212: the proper global indices.
213: */
214: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash, PetscInt *owners)
215: {
216: PetscMPIInt size = stash->size, tag1 = stash->tag1, tag2 = stash->tag2;
217: PetscInt *owner, *start, *nprocs, nsends, nreceives;
218: PetscInt nmax, count, *sindices, *rindices, i, j, idx, bs = stash->bs, lastidx;
219: PetscScalar *rvalues, *svalues;
220: MPI_Comm comm = stash->comm;
221: MPI_Request *send_waits, *recv_waits;
223: /* first count number of contributors to each processor */
224: PetscCalloc1(2 * size, &nprocs);
225: PetscMalloc1(stash->n, &owner);
227: j = 0;
228: lastidx = -1;
229: for (i = 0; i < stash->n; i++) {
230: /* if indices are NOT locally sorted, need to start search at the beginning */
231: if (lastidx > (idx = stash->idx[i])) j = 0;
232: lastidx = idx;
233: for (; j < size; j++) {
234: if (idx >= owners[j] && idx < owners[j + 1]) {
235: nprocs[2 * j]++;
236: nprocs[2 * j + 1] = 1;
237: owner[i] = j;
238: break;
239: }
240: }
241: }
242: nsends = 0;
243: for (i = 0; i < size; i++) nsends += nprocs[2 * i + 1];
245: /* inform other processors of number of messages and max length*/
246: PetscMaxSum(comm, nprocs, &nmax, &nreceives);
248: /* post receives:
249: since we don't know how long each individual message is we
250: allocate the largest needed buffer for each receive. Potentially
251: this is a lot of wasted space.
252: */
253: PetscMalloc2(nreceives * nmax * bs, &rvalues, nreceives * nmax, &rindices);
254: PetscMalloc1(2 * nreceives, &recv_waits);
255: for (i = 0, count = 0; i < nreceives; i++) {
256: MPI_Irecv(rvalues + bs * nmax * i, bs * nmax, MPIU_SCALAR, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++);
257: MPI_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag2, comm, recv_waits + count++);
258: }
260: /* do sends:
261: 1) starts[i] gives the starting index in svalues for stuff going to
262: the ith processor
263: */
264: PetscMalloc2(stash->n * bs, &svalues, stash->n, &sindices);
265: PetscMalloc1(2 * nsends, &send_waits);
266: PetscMalloc1(size, &start);
267: /* use 2 sends the first with all_v, the next with all_i */
268: start[0] = 0;
269: for (i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];
271: for (i = 0; i < stash->n; i++) {
272: j = owner[i];
273: if (bs == 1) svalues[start[j]] = stash->array[i];
274: else PetscMemcpy(svalues + bs * start[j], stash->array + bs * i, bs * sizeof(PetscScalar));
275: sindices[start[j]] = stash->idx[i];
276: start[j]++;
277: }
278: start[0] = 0;
279: for (i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];
281: for (i = 0, count = 0; i < size; i++) {
282: if (nprocs[2 * i + 1]) {
283: MPI_Isend(svalues + bs * start[i], bs * nprocs[2 * i], MPIU_SCALAR, i, tag1, comm, send_waits + count++);
284: MPI_Isend(sindices + start[i], nprocs[2 * i], MPIU_INT, i, tag2, comm, send_waits + count++);
285: }
286: }
287: PetscFree(owner);
288: PetscFree(start);
289: /* This memory is reused in scatter end for a different purpose*/
290: for (i = 0; i < 2 * size; i++) nprocs[i] = -1;
292: stash->nprocs = nprocs;
293: stash->svalues = svalues;
294: stash->sindices = sindices;
295: stash->rvalues = rvalues;
296: stash->rindices = rindices;
297: stash->nsends = nsends;
298: stash->nrecvs = nreceives;
299: stash->send_waits = send_waits;
300: stash->recv_waits = recv_waits;
301: stash->rmax = nmax;
302: return 0;
303: }
305: /*
306: VecStashScatterGetMesg_Private - This function waits on the receives posted
307: in the function VecStashScatterBegin_Private() and returns one message at
308: a time to the calling function. If no messages are left, it indicates this
309: by setting flg = 0, else it sets flg = 1.
311: Input Parameters:
312: stash - the stash
314: Output Parameters:
315: nvals - the number of entries in the current message.
316: rows - an array of row indices (or blocked indices) corresponding to the values
317: cols - an array of columnindices (or blocked indices) corresponding to the values
318: vals - the values
319: flg - 0 indicates no more message left, and the current call has no values associated.
320: 1 indicates that the current call successfully received a message, and the
321: other output parameters nvals,rows,cols,vals are set appropriately.
322: */
323: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscScalar **vals, PetscInt *flg)
324: {
325: PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
326: PetscInt *flg_v;
327: PetscInt i1, i2, bs = stash->bs;
328: MPI_Status recv_status;
329: PetscBool match_found = PETSC_FALSE;
331: *flg = 0; /* When a message is discovered this is reset to 1 */
332: /* Return if no more messages to process */
333: if (stash->nprocessed == stash->nrecvs) return 0;
335: flg_v = stash->nprocs;
336: /* If a matching pair of receives are found, process them, and return the data to
337: the calling function. Until then keep receiving messages */
338: while (!match_found) {
339: MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status);
340: /* Now pack the received message into a structure which is useable by others */
341: if (i % 2) {
342: MPI_Get_count(&recv_status, MPIU_INT, nvals);
343: flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
344: } else {
345: MPI_Get_count(&recv_status, MPIU_SCALAR, nvals);
346: flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
347: *nvals = *nvals / bs;
348: }
350: /* Check if we have both the messages from this proc */
351: i1 = flg_v[2 * recv_status.MPI_SOURCE];
352: i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
353: if (i1 != -1 && i2 != -1) {
354: *rows = stash->rindices + i2 * stash->rmax;
355: *vals = stash->rvalues + i1 * bs * stash->rmax;
356: *flg = 1;
357: stash->nprocessed++;
358: match_found = PETSC_TRUE;
359: }
360: }
361: return 0;
362: }
364: /*
365: * Sort the stash, removing duplicates (combining as appropriate).
366: */
367: PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
368: {
369: PetscInt i, j, bs = stash->bs;
371: if (!stash->n) return 0;
372: if (bs == 1) {
373: PetscSortIntWithScalarArray(stash->n, stash->idx, stash->array);
374: for (i = 1, j = 0; i < stash->n; i++) {
375: if (stash->idx[i] == stash->idx[j]) {
376: switch (stash->insertmode) {
377: case ADD_VALUES:
378: stash->array[j] += stash->array[i];
379: break;
380: case INSERT_VALUES:
381: stash->array[j] = stash->array[i];
382: break;
383: default:
384: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
385: }
386: } else {
387: j++;
388: stash->idx[j] = stash->idx[i];
389: stash->array[j] = stash->array[i];
390: }
391: }
392: stash->n = j + 1;
393: } else { /* block stash */
394: PetscInt *perm = NULL;
395: PetscScalar *arr;
396: PetscMalloc2(stash->n, &perm, stash->n * bs, &arr);
397: for (i = 0; i < stash->n; i++) perm[i] = i;
398: PetscSortIntWithArray(stash->n, stash->idx, perm);
400: /* Out-of-place copy of arr */
401: PetscMemcpy(arr, stash->array + perm[0] * bs, bs * sizeof(PetscScalar));
402: for (i = 1, j = 0; i < stash->n; i++) {
403: PetscInt k;
404: if (stash->idx[i] == stash->idx[j]) {
405: switch (stash->insertmode) {
406: case ADD_VALUES:
407: for (k = 0; k < bs; k++) arr[j * bs + k] += stash->array[perm[i] * bs + k];
408: break;
409: case INSERT_VALUES:
410: for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
411: break;
412: default:
413: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
414: }
415: } else {
416: j++;
417: stash->idx[j] = stash->idx[i];
418: for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
419: }
420: }
421: stash->n = j + 1;
422: PetscMemcpy(stash->array, arr, stash->n * bs * sizeof(PetscScalar));
423: PetscFree2(perm, arr);
424: }
425: return 0;
426: }
428: PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash, PetscLayout map, PetscMPIInt *nowners, PetscMPIInt **owners)
429: {
430: PetscInt i, bs = stash->bs;
431: PetscMPIInt r;
432: PetscSegBuffer seg;
435: PetscSegBufferCreate(sizeof(PetscMPIInt), 50, &seg);
436: *nowners = 0;
437: for (i = 0, r = -1; i < stash->n; i++) {
438: if (stash->idx[i] * bs >= map->range[r + 1]) {
439: PetscMPIInt *rank;
440: PetscSegBufferGet(seg, 1, &rank);
441: PetscLayoutFindOwner(map, stash->idx[i] * bs, &r);
442: *rank = r;
443: (*nowners)++;
444: }
445: }
446: PetscSegBufferExtractAlloc(seg, owners);
447: PetscSegBufferDestroy(&seg);
448: return 0;
449: }