Actual source code: comb.c
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h>
24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
25: {
26: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
27: MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request);
28: #else
29: MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
30: *request = MPI_REQUEST_NULL;
31: #endif
32: return 0;
33: }
35: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);
37: /*
38: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
39: */
40: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
41: {
42: PetscNew(sr);
43: (*sr)->numopsbegin = 0;
44: (*sr)->numopsend = 0;
45: (*sr)->state = STATE_BEGIN;
46: #define MAXOPS 32
47: (*sr)->maxops = MAXOPS;
48: PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix);
49: #undef MAXOPS
50: (*sr)->comm = comm;
51: (*sr)->request = MPI_REQUEST_NULL;
52: (*sr)->mix = PETSC_FALSE;
53: (*sr)->async = PETSC_FALSE;
54: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
55: (*sr)->async = PETSC_TRUE; /* Enable by default */
56: #endif
57: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
58: PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL);
59: return 0;
60: }
62: /*
63: This function is the MPI reduction operation used when there is
64: a combination of sums and max in the reduction. The call below to
65: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
66: MPI operator PetscSplitReduction_Op.
67: */
68: MPI_Op PetscSplitReduction_Op = 0;
70: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
71: {
72: struct PetscScalarInt {
73: PetscScalar v;
74: PetscInt i;
75: };
76: struct PetscScalarInt *xin = (struct PetscScalarInt *)in;
77: struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
78: PetscInt i, count = (PetscInt)*cnt;
80: if (*datatype != MPIU_SCALAR_INT) {
81: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types");
82: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
83: }
84: for (i = 0; i < count; i++) {
85: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
86: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
87: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
88: else {
89: (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
90: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
91: }
92: }
93: return;
94: }
96: /*@
97: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
99: Collective but not synchronizing
101: Input Parameter:
102: comm - communicator on which split reduction has been queued
104: Level: advanced
106: Note:
107: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
108: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
110: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
111: @*/
112: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
113: {
114: PetscSplitReduction *sr;
116: PetscSplitReductionGet(comm, &sr);
118: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
119: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
120: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
121: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
122: MPI_Comm comm = sr->comm;
123: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
125: PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0);
126: MPI_Comm_size(sr->comm, &size);
127: if (size == 1) {
128: PetscArraycpy(gvalues, lvalues, numops);
129: } else {
130: /* determine if all reductions are sum, max, or min */
131: for (i = 0; i < numops; i++) {
132: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
133: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
134: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
135: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
136: }
138: if (sum_flg + max_flg + min_flg > 1) {
139: sr->mix = PETSC_TRUE;
140: for (i = 0; i < numops; i++) {
141: sr->lvalues_mix[i].v = lvalues[i];
142: sr->lvalues_mix[i].i = reducetype[i];
143: }
144: MPIPetsc_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request);
145: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
146: MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request);
147: } else if (min_flg) {
148: MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request);
149: } else {
150: MPIPetsc_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request);
151: }
152: }
153: sr->state = STATE_PENDING;
154: sr->numopsend = 0;
155: PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0);
156: } else {
157: PetscSplitReductionApply(sr);
158: }
159: return 0;
160: }
162: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
163: {
164: switch (sr->state) {
165: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
166: PetscSplitReductionApply(sr);
167: break;
168: case STATE_PENDING:
169: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
170: PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0);
171: if (sr->request != MPI_REQUEST_NULL) MPI_Wait(&sr->request, MPI_STATUS_IGNORE);
172: sr->state = STATE_END;
173: if (sr->mix) {
174: PetscInt i;
175: for (i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
176: sr->mix = PETSC_FALSE;
177: }
178: PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0);
179: break;
180: default:
181: break; /* everything is already done */
182: }
183: return 0;
184: }
186: /*
187: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
188: */
189: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
190: {
191: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
192: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
193: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
194: MPI_Comm comm = sr->comm;
195: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
198: PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0);
199: MPI_Comm_size(sr->comm, &size);
200: if (size == 1) {
201: PetscArraycpy(gvalues, lvalues, numops);
202: } else {
203: /* determine if all reductions are sum, max, or min */
204: for (i = 0; i < numops; i++) {
205: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
206: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
207: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
208: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
209: }
210: if (sum_flg + max_flg + min_flg > 1) {
212: for (i = 0; i < numops; i++) {
213: sr->lvalues_mix[i].v = lvalues[i];
214: sr->lvalues_mix[i].i = reducetype[i];
215: }
216: MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm);
217: for (i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
218: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
219: MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm);
220: } else if (min_flg) {
221: MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm);
222: } else {
223: MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm);
224: }
225: }
226: sr->state = STATE_END;
227: sr->numopsend = 0;
228: PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0);
229: return 0;
230: }
232: /*
233: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
234: */
235: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
236: {
237: struct PetscScalarInt {
238: PetscScalar v;
239: PetscInt i;
240: };
241: PetscInt maxops = sr->maxops, *reducetype = sr->reducetype;
242: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
243: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
244: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
245: void **invecs = sr->invecs;
247: sr->maxops = 2 * maxops;
248: PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix);
249: PetscArraycpy(sr->lvalues, lvalues, maxops);
250: PetscArraycpy(sr->gvalues, gvalues, maxops);
251: PetscArraycpy(sr->reducetype, reducetype, maxops);
252: PetscArraycpy(sr->invecs, invecs, maxops);
253: PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops);
254: PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops);
255: PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix);
256: return 0;
257: }
259: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
260: {
261: PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix);
262: PetscFree(sr);
263: return 0;
264: }
266: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
268: /*
269: Private routine to delete internal storage when a communicator is freed.
270: This is called by MPI, not by users.
272: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
273: it was MPI_Comm *comm.
274: */
275: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
276: {
277: PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm);
278: PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
279: return 0;
280: }
282: /*
283: PetscSplitReductionGet - Gets the split reduction object from a
284: PETSc vector, creates if it does not exit.
286: */
287: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
288: {
289: PetscMPIInt flag;
291: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
292: /*
293: The calling sequence of the 2nd argument to this function changed
294: between MPI Standard 1.0 and the revisions 1.1 Here we match the
295: new standard, if you are using an MPI implementation that uses
296: the older version you will get a warning message about the next line;
297: it is only a warning message and should do no harm.
298: */
299: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL);
300: }
301: MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag);
302: if (!flag) { /* doesn't exist yet so create it and put it in */
303: PetscSplitReductionCreate(comm, sr);
304: MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr);
305: PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm);
306: }
307: return 0;
308: }
310: /* ----------------------------------------------------------------------------------------------------*/
312: /*@
313: VecDotBegin - Starts a split phase dot product computation.
315: Input Parameters:
316: + x - the first vector
317: . y - the second vector
318: - result - where the result will go (can be NULL)
320: Level: advanced
322: Notes:
323: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
325: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
326: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
327: @*/
328: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
329: {
330: PetscSplitReduction *sr;
331: MPI_Comm comm;
335: PetscObjectGetComm((PetscObject)x, &comm);
336: PetscSplitReductionGet(comm, &sr);
338: if (sr->numopsbegin >= sr->maxops) PetscSplitReductionExtend(sr);
339: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
340: sr->invecs[sr->numopsbegin] = (void *)x;
341: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
342: PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
343: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
344: return 0;
345: }
347: /*@
348: VecDotEnd - Ends a split phase dot product computation.
350: Input Parameters:
351: + x - the first vector (can be NULL)
352: . y - the second vector (can be NULL)
353: - result - where the result will go
355: Level: advanced
357: Notes:
358: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
360: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
361: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
363: @*/
364: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
365: {
366: PetscSplitReduction *sr;
367: MPI_Comm comm;
369: PetscObjectGetComm((PetscObject)x, &comm);
370: PetscSplitReductionGet(comm, &sr);
371: PetscSplitReductionEnd(sr);
376: *result = sr->gvalues[sr->numopsend++];
378: /*
379: We are finished getting all the results so reset to no outstanding requests
380: */
381: if (sr->numopsend == sr->numopsbegin) {
382: sr->state = STATE_BEGIN;
383: sr->numopsend = 0;
384: sr->numopsbegin = 0;
385: sr->mix = PETSC_FALSE;
386: }
387: return 0;
388: }
390: /*@
391: VecTDotBegin - Starts a split phase transpose dot product computation.
393: Input Parameters:
394: + x - the first vector
395: . y - the second vector
396: - result - where the result will go (can be NULL)
398: Level: advanced
400: Notes:
401: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
403: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
404: `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
406: @*/
407: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
408: {
409: PetscSplitReduction *sr;
410: MPI_Comm comm;
412: PetscObjectGetComm((PetscObject)x, &comm);
413: PetscSplitReductionGet(comm, &sr);
415: if (sr->numopsbegin >= sr->maxops) PetscSplitReductionExtend(sr);
416: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
417: sr->invecs[sr->numopsbegin] = (void *)x;
418: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
419: PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
420: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
421: return 0;
422: }
424: /*@
425: VecTDotEnd - Ends a split phase transpose dot product computation.
427: Input Parameters:
428: + x - the first vector (can be NULL)
429: . y - the second vector (can be NULL)
430: - result - where the result will go
432: Level: advanced
434: Notes:
435: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
437: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
438: VecDotBegin(), VecDotEnd()
439: @*/
440: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
441: {
442: /*
443: TDotEnd() is the same as DotEnd() so reuse the code
444: */
445: VecDotEnd(x, y, result);
446: return 0;
447: }
449: /* -------------------------------------------------------------------------*/
451: /*@
452: VecNormBegin - Starts a split phase norm computation.
454: Input Parameters:
455: + x - the first vector
456: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
457: - result - where the result will go (can be NULL)
459: Level: advanced
461: Notes:
462: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
464: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
466: @*/
467: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
468: {
469: PetscSplitReduction *sr;
470: PetscReal lresult[2];
471: MPI_Comm comm;
474: PetscObjectGetComm((PetscObject)x, &comm);
475: PetscSplitReductionGet(comm, &sr);
477: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscSplitReductionExtend(sr);
479: sr->invecs[sr->numopsbegin] = (void *)x;
480: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
481: PetscUseTypeMethod(x, norm_local, ntype, lresult);
482: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
483: if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
484: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
485: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
486: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
487: sr->lvalues[sr->numopsbegin++] = lresult[0];
488: if (ntype == NORM_1_AND_2) {
489: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
490: sr->lvalues[sr->numopsbegin++] = lresult[1];
491: }
492: return 0;
493: }
495: /*@
496: VecNormEnd - Ends a split phase norm computation.
498: Input Parameters:
499: + x - the first vector
500: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
501: - result - where the result will go
503: Level: advanced
505: Notes:
506: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
508: The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value
510: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
512: @*/
513: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
514: {
515: PetscSplitReduction *sr;
516: MPI_Comm comm;
519: PetscObjectGetComm((PetscObject)x, &comm);
520: PetscSplitReductionGet(comm, &sr);
521: PetscSplitReductionEnd(sr);
526: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
528: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
529: else if (ntype == NORM_1_AND_2) {
530: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
531: result[1] = PetscSqrtReal(result[1]);
532: }
533: if (ntype != NORM_1_AND_2) PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]);
535: if (sr->numopsend == sr->numopsbegin) {
536: sr->state = STATE_BEGIN;
537: sr->numopsend = 0;
538: sr->numopsbegin = 0;
539: }
540: return 0;
541: }
543: /*
544: Possibly add
546: PetscReductionSumBegin/End()
547: PetscReductionMaxBegin/End()
548: PetscReductionMinBegin/End()
549: or have more like MPI with a single function with flag for Op? Like first better
550: */
552: /*@
553: VecMDotBegin - Starts a split phase multiple dot product computation.
555: Input Parameters:
556: + x - the first vector
557: . nv - number of vectors
558: . y - array of vectors
559: - result - where the result will go (can be NULL)
561: Level: advanced
563: Notes:
564: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
566: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
567: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
568: @*/
569: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
570: {
571: PetscSplitReduction *sr;
572: MPI_Comm comm;
573: PetscInt i;
575: PetscObjectGetComm((PetscObject)x, &comm);
576: PetscSplitReductionGet(comm, &sr);
578: for (i = 0; i < nv; i++) {
579: if (sr->numopsbegin + i >= sr->maxops) PetscSplitReductionExtend(sr);
580: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
581: sr->invecs[sr->numopsbegin + i] = (void *)x;
582: }
583: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
584: PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
585: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
586: sr->numopsbegin += nv;
587: return 0;
588: }
590: /*@
591: VecMDotEnd - Ends a split phase multiple dot product computation.
593: Input Parameters:
594: + x - the first vector (can be NULL)
595: . nv - number of vectors
596: - y - array of vectors (can be NULL)
598: Output Parameters:
599: . result - where the result will go
601: Level: advanced
603: Notes:
604: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
606: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
607: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
609: @*/
610: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
611: {
612: PetscSplitReduction *sr;
613: MPI_Comm comm;
614: PetscInt i;
616: PetscObjectGetComm((PetscObject)x, &comm);
617: PetscSplitReductionGet(comm, &sr);
618: PetscSplitReductionEnd(sr);
623: for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];
625: /*
626: We are finished getting all the results so reset to no outstanding requests
627: */
628: if (sr->numopsend == sr->numopsbegin) {
629: sr->state = STATE_BEGIN;
630: sr->numopsend = 0;
631: sr->numopsbegin = 0;
632: }
633: return 0;
634: }
636: /*@
637: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
639: Input Parameters:
640: + x - the first vector
641: . nv - number of vectors
642: . y - array of vectors
643: - result - where the result will go (can be NULL)
645: Level: advanced
647: Notes:
648: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
650: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
651: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
653: @*/
654: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
655: {
656: PetscSplitReduction *sr;
657: MPI_Comm comm;
658: PetscInt i;
660: PetscObjectGetComm((PetscObject)x, &comm);
661: PetscSplitReductionGet(comm, &sr);
663: for (i = 0; i < nv; i++) {
664: if (sr->numopsbegin + i >= sr->maxops) PetscSplitReductionExtend(sr);
665: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
666: sr->invecs[sr->numopsbegin + i] = (void *)x;
667: }
668: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
669: PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
670: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
671: sr->numopsbegin += nv;
672: return 0;
673: }
675: /*@
676: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
678: Input Parameters:
679: + x - the first vector (can be NULL)
680: . nv - number of vectors
681: - y - array of vectors (can be NULL)
683: Output Parameters:
684: . result - where the result will go
686: Level: advanced
688: Notes:
689: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
691: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
692: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
693: @*/
694: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
695: {
696: /*
697: MTDotEnd() is the same as MDotEnd() so reuse the code
698: */
699: VecMDotEnd(x, nv, y, result);
700: return 0;
701: }