Actual source code: vecstash.c


  2: #include <petsc/private/vecimpl.h>

  4: #define DEFAULT_STASH_SIZE 100

  6: /*
  7:   VecStashCreate_Private - Creates a stash,currently used for all the parallel
  8:   matrix implementations. The stash is where elements of a matrix destined
  9:   to be stored on other processors are kept until matrix assembly is done.

 11:   This is a simple minded stash. Simply adds entries to end of stash.

 13:   Input Parameters:
 14:   comm - communicator, required for scatters.
 15:   bs   - stash block size. used when stashing blocks of values

 17:   Output Parameters:
 18:   stash    - the newly created stash
 19: */
 20: PetscErrorCode VecStashCreate_Private(MPI_Comm comm, PetscInt bs, VecStash *stash)
 21: {
 22:   PetscInt  max, *opt, nopt;
 23:   PetscBool flg;

 25:   /* Require 2 tags, get the second using PetscCommGetNewTag() */
 26:   stash->comm = comm;
 27:   PetscCommGetNewTag(stash->comm, &stash->tag1);
 28:   PetscCommGetNewTag(stash->comm, &stash->tag2);
 29:   MPI_Comm_size(stash->comm, &stash->size);
 30:   MPI_Comm_rank(stash->comm, &stash->rank);

 32:   nopt = stash->size;
 33:   PetscMalloc1(nopt, &opt);
 34:   PetscOptionsGetIntArray(NULL, NULL, "-vecstash_initial_size", opt, &nopt, &flg);
 35:   if (flg) {
 36:     if (nopt == 1) max = opt[0];
 37:     else if (nopt == stash->size) max = opt[stash->rank];
 38:     else if (stash->rank < nopt) max = opt[stash->rank];
 39:     else max = 0; /* use default */
 40:     stash->umax = max;
 41:   } else {
 42:     stash->umax = 0;
 43:   }
 44:   PetscFree(opt);

 46:   if (bs <= 0) bs = 1;

 48:   stash->bs       = bs;
 49:   stash->nmax     = 0;
 50:   stash->oldnmax  = 0;
 51:   stash->n        = 0;
 52:   stash->reallocs = -1;
 53:   stash->idx      = NULL;
 54:   stash->array    = NULL;

 56:   stash->send_waits   = NULL;
 57:   stash->recv_waits   = NULL;
 58:   stash->send_status  = NULL;
 59:   stash->nsends       = 0;
 60:   stash->nrecvs       = 0;
 61:   stash->svalues      = NULL;
 62:   stash->rvalues      = NULL;
 63:   stash->rmax         = 0;
 64:   stash->nprocs       = NULL;
 65:   stash->nprocessed   = 0;
 66:   stash->donotstash   = PETSC_FALSE;
 67:   stash->ignorenegidx = PETSC_FALSE;
 68:   return 0;
 69: }

 71: /*
 72:    VecStashDestroy_Private - Destroy the stash
 73: */
 74: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
 75: {
 76:   PetscFree2(stash->array, stash->idx);
 77:   PetscFree(stash->bowners);
 78:   return 0;
 79: }

 81: /*
 82:    VecStashScatterEnd_Private - This is called as the fial stage of
 83:    scatter. The final stages of message passing is done here, and
 84:    all the memory used for message passing is cleanedu up. This
 85:    routine also resets the stash, and deallocates the memory used
 86:    for the stash. It also keeps track of the current memory usage
 87:    so that the same value can be used the next time through.
 88: */
 89: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
 90: {
 91:   PetscInt    nsends = stash->nsends, oldnmax;
 92:   MPI_Status *send_status;

 94:   /* wait on sends */
 95:   if (nsends) {
 96:     PetscMalloc1(2 * nsends, &send_status);
 97:     MPI_Waitall(2 * nsends, stash->send_waits, send_status);
 98:     PetscFree(send_status);
 99:   }

101:   /* Now update nmaxold to be app 10% more than max n, this way the
102:      wastage of space is reduced the next time this stash is used.
103:      Also update the oldmax, only if it increases */
104:   if (stash->n) {
105:     oldnmax = ((PetscInt)(stash->n * 1.1) + 5) * stash->bs;
106:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
107:   }

109:   stash->nmax       = 0;
110:   stash->n          = 0;
111:   stash->reallocs   = -1;
112:   stash->rmax       = 0;
113:   stash->nprocessed = 0;

115:   PetscFree2(stash->array, stash->idx);
116:   stash->array = NULL;
117:   stash->idx   = NULL;
118:   PetscFree(stash->send_waits);
119:   PetscFree(stash->recv_waits);
120:   PetscFree2(stash->svalues, stash->sindices);
121:   PetscFree2(stash->rvalues, stash->rindices);
122:   PetscFree(stash->nprocs);
123:   return 0;
124: }

126: /*
127:    VecStashGetInfo_Private - Gets the relevant statistics of the stash

129:    Input Parameters:
130:    stash    - the stash
131:    nstash   - the size of the stash
132:    reallocs - the number of additional mallocs incurred.

134: */
135: PetscErrorCode VecStashGetInfo_Private(VecStash *stash, PetscInt *nstash, PetscInt *reallocs)
136: {
137:   if (nstash) *nstash = stash->n * stash->bs;
138:   if (reallocs) {
139:     if (stash->reallocs < 0) *reallocs = 0;
140:     else *reallocs = stash->reallocs;
141:   }
142:   return 0;
143: }

145: /*
146:    VecStashSetInitialSize_Private - Sets the initial size of the stash

148:    Input Parameters:
149:    stash  - the stash
150:    max    - the value that is used as the max size of the stash.
151:             this value is used while allocating memory. It specifies
152:             the number of vals stored, even with the block-stash
153: */
154: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash, PetscInt max)
155: {
156:   stash->umax = max;
157:   return 0;
158: }

160: /* VecStashExpand_Private - Expand the stash. This function is called
161:    when the space in the stash is not sufficient to add the new values
162:    being inserted into the stash.

164:    Input Parameters:
165:    stash - the stash
166:    incr  - the minimum increase requested

168:    Notes:
169:    This routine doubles the currently used memory.
170: */
171: PetscErrorCode VecStashExpand_Private(VecStash *stash, PetscInt incr)
172: {
173:   PetscInt    *n_idx, newnmax, bs = stash->bs;
174:   PetscScalar *n_array;

176:   /* allocate a larger stash. */
177:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
178:     if (stash->umax) newnmax = stash->umax / bs;
179:     else newnmax = DEFAULT_STASH_SIZE / bs;
180:   } else if (!stash->nmax) { /* resuing stash */
181:     if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs;
182:     else newnmax = stash->oldnmax / bs;
183:   } else newnmax = stash->nmax * 2;

185:   if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr;

187:   PetscMalloc2(bs * newnmax, &n_array, newnmax, &n_idx);
188:   PetscMemcpy(n_array, stash->array, bs * stash->nmax * sizeof(PetscScalar));
189:   PetscMemcpy(n_idx, stash->idx, stash->nmax * sizeof(PetscInt));
190:   PetscFree2(stash->array, stash->idx);

192:   stash->array = n_array;
193:   stash->idx   = n_idx;
194:   stash->nmax  = newnmax;
195:   stash->reallocs++;
196:   return 0;
197: }
198: /*
199:   VecStashScatterBegin_Private - Initiates the transfer of values to the
200:   correct owners. This function goes through the stash, and check the
201:   owners of each stashed value, and sends the values off to the owner
202:   processors.

204:   Input Parameters:
205:   stash  - the stash
206:   owners - an array of size 'no-of-procs' which gives the ownership range
207:            for each node.

209:   Notes:
210:     The 'owners' array in the cased of the blocked-stash has the
211:   ranges specified blocked global indices, and for the regular stash in
212:   the proper global indices.
213: */
214: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash, PetscInt *owners)
215: {
216:   PetscMPIInt  size = stash->size, tag1 = stash->tag1, tag2 = stash->tag2;
217:   PetscInt    *owner, *start, *nprocs, nsends, nreceives;
218:   PetscInt     nmax, count, *sindices, *rindices, i, j, idx, bs = stash->bs, lastidx;
219:   PetscScalar *rvalues, *svalues;
220:   MPI_Comm     comm = stash->comm;
221:   MPI_Request *send_waits, *recv_waits;

223:   /*  first count number of contributors to each processor */
224:   PetscCalloc1(2 * size, &nprocs);
225:   PetscMalloc1(stash->n, &owner);

227:   j       = 0;
228:   lastidx = -1;
229:   for (i = 0; i < stash->n; i++) {
230:     /* if indices are NOT locally sorted, need to start search at the beginning */
231:     if (lastidx > (idx = stash->idx[i])) j = 0;
232:     lastidx = idx;
233:     for (; j < size; j++) {
234:       if (idx >= owners[j] && idx < owners[j + 1]) {
235:         nprocs[2 * j]++;
236:         nprocs[2 * j + 1] = 1;
237:         owner[i]          = j;
238:         break;
239:       }
240:     }
241:   }
242:   nsends = 0;
243:   for (i = 0; i < size; i++) nsends += nprocs[2 * i + 1];

245:   /* inform other processors of number of messages and max length*/
246:   PetscMaxSum(comm, nprocs, &nmax, &nreceives);

248:   /* post receives:
249:      since we don't know how long each individual message is we
250:      allocate the largest needed buffer for each receive. Potentially
251:      this is a lot of wasted space.
252:   */
253:   PetscMalloc2(nreceives * nmax * bs, &rvalues, nreceives * nmax, &rindices);
254:   PetscMalloc1(2 * nreceives, &recv_waits);
255:   for (i = 0, count = 0; i < nreceives; i++) {
256:     MPI_Irecv(rvalues + bs * nmax * i, bs * nmax, MPIU_SCALAR, MPI_ANY_SOURCE, tag1, comm, recv_waits + count++);
257:     MPI_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag2, comm, recv_waits + count++);
258:   }

260:   /* do sends:
261:       1) starts[i] gives the starting index in svalues for stuff going to
262:          the ith processor
263:   */
264:   PetscMalloc2(stash->n * bs, &svalues, stash->n, &sindices);
265:   PetscMalloc1(2 * nsends, &send_waits);
266:   PetscMalloc1(size, &start);
267:   /* use 2 sends the first with all_v, the next with all_i */
268:   start[0] = 0;
269:   for (i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];

271:   for (i = 0; i < stash->n; i++) {
272:     j = owner[i];
273:     if (bs == 1) svalues[start[j]] = stash->array[i];
274:     else PetscMemcpy(svalues + bs * start[j], stash->array + bs * i, bs * sizeof(PetscScalar));
275:     sindices[start[j]] = stash->idx[i];
276:     start[j]++;
277:   }
278:   start[0] = 0;
279:   for (i = 1; i < size; i++) start[i] = start[i - 1] + nprocs[2 * i - 2];

281:   for (i = 0, count = 0; i < size; i++) {
282:     if (nprocs[2 * i + 1]) {
283:       MPI_Isend(svalues + bs * start[i], bs * nprocs[2 * i], MPIU_SCALAR, i, tag1, comm, send_waits + count++);
284:       MPI_Isend(sindices + start[i], nprocs[2 * i], MPIU_INT, i, tag2, comm, send_waits + count++);
285:     }
286:   }
287:   PetscFree(owner);
288:   PetscFree(start);
289:   /* This memory is reused in scatter end  for a different purpose*/
290:   for (i = 0; i < 2 * size; i++) nprocs[i] = -1;

292:   stash->nprocs     = nprocs;
293:   stash->svalues    = svalues;
294:   stash->sindices   = sindices;
295:   stash->rvalues    = rvalues;
296:   stash->rindices   = rindices;
297:   stash->nsends     = nsends;
298:   stash->nrecvs     = nreceives;
299:   stash->send_waits = send_waits;
300:   stash->recv_waits = recv_waits;
301:   stash->rmax       = nmax;
302:   return 0;
303: }

305: /*
306:    VecStashScatterGetMesg_Private - This function waits on the receives posted
307:    in the function VecStashScatterBegin_Private() and returns one message at
308:    a time to the calling function. If no messages are left, it indicates this
309:    by setting flg = 0, else it sets flg = 1.

311:    Input Parameters:
312:    stash - the stash

314:    Output Parameters:
315:    nvals - the number of entries in the current message.
316:    rows  - an array of row indices (or blocked indices) corresponding to the values
317:    cols  - an array of columnindices (or blocked indices) corresponding to the values
318:    vals  - the values
319:    flg   - 0 indicates no more message left, and the current call has no values associated.
320:            1 indicates that the current call successfully received a message, and the
321:              other output parameters nvals,rows,cols,vals are set appropriately.
322: */
323: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscScalar **vals, PetscInt *flg)
324: {
325:   PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
326:   PetscInt   *flg_v;
327:   PetscInt    i1, i2, bs = stash->bs;
328:   MPI_Status  recv_status;
329:   PetscBool   match_found = PETSC_FALSE;

331:   *flg = 0; /* When a message is discovered this is reset to 1 */
332:   /* Return if no more messages to process */
333:   if (stash->nprocessed == stash->nrecvs) return 0;

335:   flg_v = stash->nprocs;
336:   /* If a matching pair of receives are found, process them, and return the data to
337:      the calling function. Until then keep receiving messages */
338:   while (!match_found) {
339:     MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status);
340:     /* Now pack the received message into a structure which is useable by others */
341:     if (i % 2) {
342:       MPI_Get_count(&recv_status, MPIU_INT, nvals);
343:       flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2;
344:     } else {
345:       MPI_Get_count(&recv_status, MPIU_SCALAR, nvals);
346:       flg_v[2 * recv_status.MPI_SOURCE] = i / 2;
347:       *nvals                            = *nvals / bs;
348:     }

350:     /* Check if we have both the messages from this proc */
351:     i1 = flg_v[2 * recv_status.MPI_SOURCE];
352:     i2 = flg_v[2 * recv_status.MPI_SOURCE + 1];
353:     if (i1 != -1 && i2 != -1) {
354:       *rows = stash->rindices + i2 * stash->rmax;
355:       *vals = stash->rvalues + i1 * bs * stash->rmax;
356:       *flg  = 1;
357:       stash->nprocessed++;
358:       match_found = PETSC_TRUE;
359:     }
360:   }
361:   return 0;
362: }

364: /*
365:  * Sort the stash, removing duplicates (combining as appropriate).
366:  */
367: PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
368: {
369:   PetscInt i, j, bs = stash->bs;

371:   if (!stash->n) return 0;
372:   if (bs == 1) {
373:     PetscSortIntWithScalarArray(stash->n, stash->idx, stash->array);
374:     for (i = 1, j = 0; i < stash->n; i++) {
375:       if (stash->idx[i] == stash->idx[j]) {
376:         switch (stash->insertmode) {
377:         case ADD_VALUES:
378:           stash->array[j] += stash->array[i];
379:           break;
380:         case INSERT_VALUES:
381:           stash->array[j] = stash->array[i];
382:           break;
383:         default:
384:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
385:         }
386:       } else {
387:         j++;
388:         stash->idx[j]   = stash->idx[i];
389:         stash->array[j] = stash->array[i];
390:       }
391:     }
392:     stash->n = j + 1;
393:   } else { /* block stash */
394:     PetscInt    *perm = NULL;
395:     PetscScalar *arr;
396:     PetscMalloc2(stash->n, &perm, stash->n * bs, &arr);
397:     for (i = 0; i < stash->n; i++) perm[i] = i;
398:     PetscSortIntWithArray(stash->n, stash->idx, perm);

400:     /* Out-of-place copy of arr */
401:     PetscMemcpy(arr, stash->array + perm[0] * bs, bs * sizeof(PetscScalar));
402:     for (i = 1, j = 0; i < stash->n; i++) {
403:       PetscInt k;
404:       if (stash->idx[i] == stash->idx[j]) {
405:         switch (stash->insertmode) {
406:         case ADD_VALUES:
407:           for (k = 0; k < bs; k++) arr[j * bs + k] += stash->array[perm[i] * bs + k];
408:           break;
409:         case INSERT_VALUES:
410:           for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
411:           break;
412:         default:
413:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Insert mode not supported 0x%x", stash->insertmode);
414:         }
415:       } else {
416:         j++;
417:         stash->idx[j] = stash->idx[i];
418:         for (k = 0; k < bs; k++) arr[j * bs + k] = stash->array[perm[i] * bs + k];
419:       }
420:     }
421:     stash->n = j + 1;
422:     PetscMemcpy(stash->array, arr, stash->n * bs * sizeof(PetscScalar));
423:     PetscFree2(perm, arr);
424:   }
425:   return 0;
426: }

428: PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash, PetscLayout map, PetscMPIInt *nowners, PetscMPIInt **owners)
429: {
430:   PetscInt       i, bs = stash->bs;
431:   PetscMPIInt    r;
432:   PetscSegBuffer seg;

435:   PetscSegBufferCreate(sizeof(PetscMPIInt), 50, &seg);
436:   *nowners = 0;
437:   for (i = 0, r = -1; i < stash->n; i++) {
438:     if (stash->idx[i] * bs >= map->range[r + 1]) {
439:       PetscMPIInt *rank;
440:       PetscSegBufferGet(seg, 1, &rank);
441:       PetscLayoutFindOwner(map, stash->idx[i] * bs, &r);
442:       *rank = r;
443:       (*nowners)++;
444:     }
445:   }
446:   PetscSegBufferExtractAlloc(seg, owners);
447:   PetscSegBufferDestroy(&seg);
448:   return 0;
449: }