Actual source code: comb.c


  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc/private/vecimpl.h>

 24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
 25: {
 26: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 27:   MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request);
 28: #else
 29:   MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
 30:   *request = MPI_REQUEST_NULL;
 31: #endif
 32:   return 0;
 33: }

 35: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);

 37: /*
 38:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 39: */
 40: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
 41: {
 42:   PetscNew(sr);
 43:   (*sr)->numopsbegin = 0;
 44:   (*sr)->numopsend   = 0;
 45:   (*sr)->state       = STATE_BEGIN;
 46: #define MAXOPS 32
 47:   (*sr)->maxops = MAXOPS;
 48:   PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix);
 49: #undef MAXOPS
 50:   (*sr)->comm    = comm;
 51:   (*sr)->request = MPI_REQUEST_NULL;
 52:   (*sr)->mix     = PETSC_FALSE;
 53:   (*sr)->async   = PETSC_FALSE;
 54: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 55:   (*sr)->async = PETSC_TRUE; /* Enable by default */
 56: #endif
 57:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 58:   PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL);
 59:   return 0;
 60: }

 62: /*
 63:        This function is the MPI reduction operation used when there is
 64:    a combination of sums and max in the reduction. The call below to
 65:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 66:    MPI operator PetscSplitReduction_Op.
 67: */
 68: MPI_Op PetscSplitReduction_Op = 0;

 70: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
 71: {
 72:   struct PetscScalarInt {
 73:     PetscScalar v;
 74:     PetscInt    i;
 75:   };
 76:   struct PetscScalarInt *xin  = (struct PetscScalarInt *)in;
 77:   struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
 78:   PetscInt               i, count = (PetscInt)*cnt;

 80:   if (*datatype != MPIU_SCALAR_INT) {
 81:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types");
 82:     PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 83:   }
 84:   for (i = 0; i < count; i++) {
 85:     if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
 86:     else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 87:     else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 88:     else {
 89:       (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
 90:       PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 91:     }
 92:   }
 93:   return;
 94: }

 96: /*@
 97:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

 99:    Collective but not synchronizing

101:    Input Parameter:
102:    comm - communicator on which split reduction has been queued

104:    Level: advanced

106:    Note:
107:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
108:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

110: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
111: @*/
112: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
113: {
114:   PetscSplitReduction *sr;

116:   PetscSplitReductionGet(comm, &sr);
118:   if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
119:     PetscInt     i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
120:     PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
121:     PetscInt     sum_flg = 0, max_flg = 0, min_flg = 0;
122:     MPI_Comm     comm = sr->comm;
123:     PetscMPIInt  size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

125:     PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0);
126:     MPI_Comm_size(sr->comm, &size);
127:     if (size == 1) {
128:       PetscArraycpy(gvalues, lvalues, numops);
129:     } else {
130:       /* determine if all reductions are sum, max, or min */
131:       for (i = 0; i < numops; i++) {
132:         if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
133:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
134:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
135:         else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
136:       }
138:       if (sum_flg + max_flg + min_flg > 1) {
139:         sr->mix = PETSC_TRUE;
140:         for (i = 0; i < numops; i++) {
141:           sr->lvalues_mix[i].v = lvalues[i];
142:           sr->lvalues_mix[i].i = reducetype[i];
143:         }
144:         MPIPetsc_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request);
145:       } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
146:         MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request);
147:       } else if (min_flg) {
148:         MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request);
149:       } else {
150:         MPIPetsc_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request);
151:       }
152:     }
153:     sr->state     = STATE_PENDING;
154:     sr->numopsend = 0;
155:     PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0);
156:   } else {
157:     PetscSplitReductionApply(sr);
158:   }
159:   return 0;
160: }

162: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
163: {
164:   switch (sr->state) {
165:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
166:     PetscSplitReductionApply(sr);
167:     break;
168:   case STATE_PENDING:
169:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
170:     PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0);
171:     if (sr->request != MPI_REQUEST_NULL) MPI_Wait(&sr->request, MPI_STATUS_IGNORE);
172:     sr->state = STATE_END;
173:     if (sr->mix) {
174:       PetscInt i;
175:       for (i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
176:       sr->mix = PETSC_FALSE;
177:     }
178:     PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0);
179:     break;
180:   default:
181:     break; /* everything is already done */
182:   }
183:   return 0;
184: }

186: /*
187:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
188: */
189: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
190: {
191:   PetscInt     i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
192:   PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
193:   PetscInt     sum_flg = 0, max_flg = 0, min_flg = 0;
194:   MPI_Comm     comm = sr->comm;
195:   PetscMPIInt  size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

198:   PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0);
199:   MPI_Comm_size(sr->comm, &size);
200:   if (size == 1) {
201:     PetscArraycpy(gvalues, lvalues, numops);
202:   } else {
203:     /* determine if all reductions are sum, max, or min */
204:     for (i = 0; i < numops; i++) {
205:       if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
206:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
207:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
208:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
209:     }
210:     if (sum_flg + max_flg + min_flg > 1) {
212:       for (i = 0; i < numops; i++) {
213:         sr->lvalues_mix[i].v = lvalues[i];
214:         sr->lvalues_mix[i].i = reducetype[i];
215:       }
216:       MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm);
217:       for (i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
218:     } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
219:       MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm);
220:     } else if (min_flg) {
221:       MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm);
222:     } else {
223:       MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm);
224:     }
225:   }
226:   sr->state     = STATE_END;
227:   sr->numopsend = 0;
228:   PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0);
229:   return 0;
230: }

232: /*
233:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
234: */
235: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
236: {
237:   struct PetscScalarInt {
238:     PetscScalar v;
239:     PetscInt    i;
240:   };
241:   PetscInt               maxops = sr->maxops, *reducetype = sr->reducetype;
242:   PetscScalar           *lvalues = sr->lvalues, *gvalues = sr->gvalues;
243:   struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
244:   struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
245:   void                 **invecs      = sr->invecs;

247:   sr->maxops = 2 * maxops;
248:   PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix);
249:   PetscArraycpy(sr->lvalues, lvalues, maxops);
250:   PetscArraycpy(sr->gvalues, gvalues, maxops);
251:   PetscArraycpy(sr->reducetype, reducetype, maxops);
252:   PetscArraycpy(sr->invecs, invecs, maxops);
253:   PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops);
254:   PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops);
255:   PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix);
256:   return 0;
257: }

259: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
260: {
261:   PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix);
262:   PetscFree(sr);
263:   return 0;
264: }

266: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

268: /*
269:    Private routine to delete internal storage when a communicator is freed.
270:   This is called by MPI, not by users.

272:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
273:   it was MPI_Comm *comm.
274: */
275: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
276: {
277:   PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm);
278:   PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
279:   return 0;
280: }

282: /*
283:      PetscSplitReductionGet - Gets the split reduction object from a
284:         PETSc vector, creates if it does not exit.

286: */
287: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
288: {
289:   PetscMPIInt flag;

291:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
292:     /*
293:        The calling sequence of the 2nd argument to this function changed
294:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
295:        new standard, if you are using an MPI implementation that uses
296:        the older version you will get a warning message about the next line;
297:        it is only a warning message and should do no harm.
298:     */
299:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL);
300:   }
301:   MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag);
302:   if (!flag) { /* doesn't exist yet so create it and put it in */
303:     PetscSplitReductionCreate(comm, sr);
304:     MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr);
305:     PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm);
306:   }
307:   return 0;
308: }

310: /* ----------------------------------------------------------------------------------------------------*/

312: /*@
313:    VecDotBegin - Starts a split phase dot product computation.

315:    Input Parameters:
316: +   x - the first vector
317: .   y - the second vector
318: -   result - where the result will go (can be NULL)

320:    Level: advanced

322:    Notes:
323:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

325: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
326:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
327: @*/
328: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
329: {
330:   PetscSplitReduction *sr;
331:   MPI_Comm             comm;

335:   PetscObjectGetComm((PetscObject)x, &comm);
336:   PetscSplitReductionGet(comm, &sr);
338:   if (sr->numopsbegin >= sr->maxops) PetscSplitReductionExtend(sr);
339:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
340:   sr->invecs[sr->numopsbegin]     = (void *)x;
341:   PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
342:   PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
343:   PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
344:   return 0;
345: }

347: /*@
348:    VecDotEnd - Ends a split phase dot product computation.

350:    Input Parameters:
351: +  x - the first vector (can be NULL)
352: .  y - the second vector (can be NULL)
353: -  result - where the result will go

355:    Level: advanced

357:    Notes:
358:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

360: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
361:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`

363: @*/
364: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
365: {
366:   PetscSplitReduction *sr;
367:   MPI_Comm             comm;

369:   PetscObjectGetComm((PetscObject)x, &comm);
370:   PetscSplitReductionGet(comm, &sr);
371:   PetscSplitReductionEnd(sr);

376:   *result = sr->gvalues[sr->numopsend++];

378:   /*
379:      We are finished getting all the results so reset to no outstanding requests
380:   */
381:   if (sr->numopsend == sr->numopsbegin) {
382:     sr->state       = STATE_BEGIN;
383:     sr->numopsend   = 0;
384:     sr->numopsbegin = 0;
385:     sr->mix         = PETSC_FALSE;
386:   }
387:   return 0;
388: }

390: /*@
391:    VecTDotBegin - Starts a split phase transpose dot product computation.

393:    Input Parameters:
394: +  x - the first vector
395: .  y - the second vector
396: -  result - where the result will go (can be NULL)

398:    Level: advanced

400:    Notes:
401:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

403: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
404:           `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`

406: @*/
407: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
408: {
409:   PetscSplitReduction *sr;
410:   MPI_Comm             comm;

412:   PetscObjectGetComm((PetscObject)x, &comm);
413:   PetscSplitReductionGet(comm, &sr);
415:   if (sr->numopsbegin >= sr->maxops) PetscSplitReductionExtend(sr);
416:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
417:   sr->invecs[sr->numopsbegin]     = (void *)x;
418:   PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
419:   PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
420:   PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
421:   return 0;
422: }

424: /*@
425:    VecTDotEnd - Ends a split phase transpose dot product computation.

427:    Input Parameters:
428: +  x - the first vector (can be NULL)
429: .  y - the second vector (can be NULL)
430: -  result - where the result will go

432:    Level: advanced

434:    Notes:
435:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

437: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
438:          VecDotBegin(), VecDotEnd()
439: @*/
440: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
441: {
442:   /*
443:       TDotEnd() is the same as DotEnd() so reuse the code
444:   */
445:   VecDotEnd(x, y, result);
446:   return 0;
447: }

449: /* -------------------------------------------------------------------------*/

451: /*@
452:    VecNormBegin - Starts a split phase norm computation.

454:    Input Parameters:
455: +  x - the first vector
456: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
457: -  result - where the result will go (can be NULL)

459:    Level: advanced

461:    Notes:
462:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

464: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`

466: @*/
467: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
468: {
469:   PetscSplitReduction *sr;
470:   PetscReal            lresult[2];
471:   MPI_Comm             comm;

474:   PetscObjectGetComm((PetscObject)x, &comm);
475:   PetscSplitReductionGet(comm, &sr);
477:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscSplitReductionExtend(sr);

479:   sr->invecs[sr->numopsbegin] = (void *)x;
480:   PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
481:   PetscUseTypeMethod(x, norm_local, ntype, lresult);
482:   PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
483:   if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
484:   if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
485:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
486:   else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
487:   sr->lvalues[sr->numopsbegin++] = lresult[0];
488:   if (ntype == NORM_1_AND_2) {
489:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
490:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
491:   }
492:   return 0;
493: }

495: /*@
496:    VecNormEnd - Ends a split phase norm computation.

498:    Input Parameters:
499: +  x - the first vector
500: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
501: -  result - where the result will go

503:    Level: advanced

505:    Notes:
506:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

508:    The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value

510: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`

512: @*/
513: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
514: {
515:   PetscSplitReduction *sr;
516:   MPI_Comm             comm;

519:   PetscObjectGetComm((PetscObject)x, &comm);
520:   PetscSplitReductionGet(comm, &sr);
521:   PetscSplitReductionEnd(sr);

526:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

528:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
529:   else if (ntype == NORM_1_AND_2) {
530:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
531:     result[1] = PetscSqrtReal(result[1]);
532:   }
533:   if (ntype != NORM_1_AND_2) PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]);

535:   if (sr->numopsend == sr->numopsbegin) {
536:     sr->state       = STATE_BEGIN;
537:     sr->numopsend   = 0;
538:     sr->numopsbegin = 0;
539:   }
540:   return 0;
541: }

543: /*
544:    Possibly add

546:      PetscReductionSumBegin/End()
547:      PetscReductionMaxBegin/End()
548:      PetscReductionMinBegin/End()
549:    or have more like MPI with a single function with flag for Op? Like first better
550: */

552: /*@
553:    VecMDotBegin - Starts a split phase multiple dot product computation.

555:    Input Parameters:
556: +   x - the first vector
557: .   nv - number of vectors
558: .   y - array of vectors
559: -   result - where the result will go (can be NULL)

561:    Level: advanced

563:    Notes:
564:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

566: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
567:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
568: @*/
569: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
570: {
571:   PetscSplitReduction *sr;
572:   MPI_Comm             comm;
573:   PetscInt             i;

575:   PetscObjectGetComm((PetscObject)x, &comm);
576:   PetscSplitReductionGet(comm, &sr);
578:   for (i = 0; i < nv; i++) {
579:     if (sr->numopsbegin + i >= sr->maxops) PetscSplitReductionExtend(sr);
580:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
581:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
582:   }
583:   PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
584:   PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
585:   PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
586:   sr->numopsbegin += nv;
587:   return 0;
588: }

590: /*@
591:    VecMDotEnd - Ends a split phase multiple dot product computation.

593:    Input Parameters:
594: +   x - the first vector (can be NULL)
595: .   nv - number of vectors
596: -   y - array of vectors (can be NULL)

598:    Output Parameters:
599: .   result - where the result will go

601:    Level: advanced

603:    Notes:
604:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

606: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
607:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`

609: @*/
610: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
611: {
612:   PetscSplitReduction *sr;
613:   MPI_Comm             comm;
614:   PetscInt             i;

616:   PetscObjectGetComm((PetscObject)x, &comm);
617:   PetscSplitReductionGet(comm, &sr);
618:   PetscSplitReductionEnd(sr);

623:   for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];

625:   /*
626:      We are finished getting all the results so reset to no outstanding requests
627:   */
628:   if (sr->numopsend == sr->numopsbegin) {
629:     sr->state       = STATE_BEGIN;
630:     sr->numopsend   = 0;
631:     sr->numopsbegin = 0;
632:   }
633:   return 0;
634: }

636: /*@
637:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

639:    Input Parameters:
640: +  x - the first vector
641: .  nv - number of vectors
642: .  y - array of  vectors
643: -  result - where the result will go (can be NULL)

645:    Level: advanced

647:    Notes:
648:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

650: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
651:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`

653: @*/
654: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
655: {
656:   PetscSplitReduction *sr;
657:   MPI_Comm             comm;
658:   PetscInt             i;

660:   PetscObjectGetComm((PetscObject)x, &comm);
661:   PetscSplitReductionGet(comm, &sr);
663:   for (i = 0; i < nv; i++) {
664:     if (sr->numopsbegin + i >= sr->maxops) PetscSplitReductionExtend(sr);
665:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
666:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
667:   }
668:   PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
669:   PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
670:   PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
671:   sr->numopsbegin += nv;
672:   return 0;
673: }

675: /*@
676:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

678:    Input Parameters:
679: +  x - the first vector (can be NULL)
680: .  nv - number of vectors
681: -  y - array of  vectors (can be NULL)

683:    Output Parameters:
684: .  result - where the result will go

686:    Level: advanced

688:    Notes:
689:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

691: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
692:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
693: @*/
694: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
695: {
696:   /*
697:       MTDotEnd() is the same as MDotEnd() so reuse the code
698:   */
699:   VecMDotEnd(x, nv, y, result);
700:   return 0;
701: }