Actual source code: mpikok.kokkos.cxx
2: /*
3: This file contains routines for Parallel vector operations.
4: */
6: #include <petscvec_kokkos.hpp>
7: #include <petsc/private/deviceimpl.h>
8: #include <petsc/private/vecimpl.h>
9: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
10: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
11: #include <petscsf.h>
13: PetscErrorCode VecDestroy_MPIKokkos(Vec v)
14: {
15: Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(v->spptr);
17: delete veckok;
18: VecDestroy_MPI(v);
19: return 0;
20: }
22: PetscErrorCode VecNorm_MPIKokkos(Vec xin, NormType type, PetscReal *z)
23: {
24: PetscReal sum, work = 0.0;
26: if (type == NORM_2 || type == NORM_FROBENIUS) {
27: VecNorm_SeqKokkos(xin, NORM_2, &work);
28: work *= work;
29: MPIU_Allreduce(&work, &sum, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
30: *z = PetscSqrtReal(sum);
31: } else if (type == NORM_1) {
32: /* Find the local part */
33: VecNorm_SeqKokkos(xin, NORM_1, &work);
34: /* Find the global max */
35: MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
36: } else if (type == NORM_INFINITY) {
37: /* Find the local max */
38: VecNorm_SeqKokkos(xin, NORM_INFINITY, &work);
39: /* Find the global max */
40: MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin));
41: } else if (type == NORM_1_AND_2) {
42: PetscReal temp[2];
43: VecNorm_SeqKokkos(xin, NORM_1, temp);
44: VecNorm_SeqKokkos(xin, NORM_2, temp + 1);
45: temp[1] = temp[1] * temp[1];
46: MPIU_Allreduce(temp, z, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
47: z[1] = PetscSqrtReal(z[1]);
48: }
49: return 0;
50: }
52: /* z = y^H x */
53: PetscErrorCode VecDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
54: {
55: PetscScalar sum, work;
57: VecDot_SeqKokkos(xin, yin, &work);
58: MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
59: *z = sum;
60: return 0;
61: }
63: PetscErrorCode VecMDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
64: {
65: PetscScalar awork[128], *work = awork;
67: if (nv > 128) PetscMalloc1(nv, &work);
68: VecMDot_SeqKokkos(xin, nv, y, work);
69: MPIU_Allreduce(work, z, nv, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
70: if (nv > 128) PetscFree(work);
71: return 0;
72: }
74: /* z = y^T x */
75: PetscErrorCode VecTDot_MPIKokkos(Vec xin, Vec yin, PetscScalar *z)
76: {
77: PetscScalar sum, work;
79: VecTDot_SeqKokkos(xin, yin, &work);
80: MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
81: *z = sum;
82: return 0;
83: }
85: PetscErrorCode VecMTDot_MPIKokkos(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
86: {
87: PetscScalar awork[128], *work = awork;
89: if (nv > 128) PetscMalloc1(nv, &work);
90: VecMTDot_SeqKokkos(xin, nv, y, work);
91: MPIU_Allreduce(work, z, nv, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
92: if (nv > 128) PetscFree(work);
93: return 0;
94: }
96: PetscErrorCode VecMax_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
97: {
98: PetscReal work;
100: /* Find the local max */
101: VecMax_SeqKokkos(xin, idx, &work);
102: #if defined(PETSC_HAVE_MPIUNI)
103: *z = work;
104: #else
105: /* Find the global max */
106: if (!idx) { /* User does not need idx */
107: MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin));
108: } else {
109: struct {
110: PetscReal v;
111: PetscInt i;
112: } in, out;
114: in.v = work;
115: in.i = *idx + xin->map->rstart;
116: MPIU_Allreduce(&in, &out, 1, MPIU_REAL_INT, MPIU_MAXLOC, PetscObjectComm((PetscObject)xin));
117: *z = out.v;
118: *idx = out.i;
119: }
120: #endif
121: return 0;
122: }
124: PetscErrorCode VecMin_MPIKokkos(Vec xin, PetscInt *idx, PetscReal *z)
125: {
126: PetscReal work;
128: /* Find the local Min */
129: VecMin_SeqKokkos(xin, idx, &work);
130: #if defined(PETSC_HAVE_MPIUNI)
131: *z = work;
132: #else
133: /* Find the global Min */
134: if (!idx) {
135: MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)xin));
136: } else {
137: struct {
138: PetscReal v;
139: PetscInt i;
140: } in, out;
142: in.v = work;
143: in.i = *idx + xin->map->rstart;
144: MPIU_Allreduce(&in, &out, 1, MPIU_REAL_INT, MPIU_MINLOC, PetscObjectComm((PetscObject)xin));
145: *z = out.v;
146: *idx = out.i;
147: }
148: #endif
149: return 0;
150: }
152: PetscErrorCode VecDuplicate_MPIKokkos(Vec win, Vec *vv)
153: {
154: Vec v;
155: Vec_MPI *vecmpi;
156: Vec_Kokkos *veckok;
158: /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
159: VecDuplicate_MPI(win, &v); /* after the call, v is a VECMPI, with data zero'ed */
160: PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS);
161: PetscMemcpy(v->ops, win->ops, sizeof(struct _VecOps));
163: /* Build the Vec_Kokkos struct */
164: vecmpi = static_cast<Vec_MPI *>(v->data);
165: veckok = new Vec_Kokkos(v->map->n, vecmpi->array);
166: Kokkos::deep_copy(veckok->v_dual.view_device(), 0.0);
167: v->spptr = veckok;
168: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
169: *vv = v;
170: return 0;
171: }
173: PetscErrorCode VecDotNorm2_MPIKokkos(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
174: {
175: PetscScalar work[2], sum[2];
177: VecDotNorm2_SeqKokkos(s, t, work, work + 1);
178: MPIU_Allreduce(&work, &sum, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)s));
179: *dp = sum[0];
180: *nm = sum[1];
181: return 0;
182: }
184: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x, IS is, Vec *y)
185: {
186: VecGetSubVector_Kokkos_Private(x, PETSC_TRUE, is, y);
187: return 0;
188: }
190: static PetscErrorCode VecSetPreallocationCOO_MPIKokkos(Vec x, PetscCount ncoo, const PetscInt coo_i[])
191: {
192: Vec_MPI *vecmpi = static_cast<Vec_MPI *>(x->data);
193: Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(x->spptr);
194: PetscInt m;
196: VecGetLocalSize(x, &m);
197: VecSetPreallocationCOO_MPI(x, ncoo, coo_i);
198: veckok->SetUpCOO(vecmpi, m);
199: return 0;
200: }
202: static PetscErrorCode VecSetValuesCOO_MPIKokkos(Vec x, const PetscScalar v[], InsertMode imode)
203: {
204: Vec_MPI *vecmpi = static_cast<Vec_MPI *>(x->data);
205: Vec_Kokkos *veckok = static_cast<Vec_Kokkos *>(x->spptr);
206: const PetscCountKokkosView &jmap1 = veckok->jmap1_d;
207: const PetscCountKokkosView &perm1 = veckok->perm1_d;
208: const PetscCountKokkosView &imap2 = veckok->imap2_d;
209: const PetscCountKokkosView &jmap2 = veckok->jmap2_d;
210: const PetscCountKokkosView &perm2 = veckok->perm2_d;
211: const PetscCountKokkosView &Cperm = veckok->Cperm_d;
212: PetscScalarKokkosView &sendbuf = veckok->sendbuf_d;
213: PetscScalarKokkosView &recvbuf = veckok->recvbuf_d;
214: PetscScalarKokkosView xv;
215: ConstPetscScalarKokkosView vv;
216: PetscMemType memtype;
217: PetscInt m;
219: VecGetLocalSize(x, &m);
220: PetscGetMemType(v, &memtype);
221: if (PetscMemTypeHost(memtype)) { /* If user gave v[] in host, we might need to copy it to device if any */
222: vv = Kokkos::create_mirror_view_and_copy(DefaultMemorySpace(), ConstPetscScalarKokkosViewHost(v, vecmpi->coo_n));
223: } else {
224: vv = ConstPetscScalarKokkosView(v, vecmpi->coo_n); /* Directly use v[]'s memory */
225: }
227: /* Pack entries to be sent to remote */
228: Kokkos::parallel_for(
229: vecmpi->sendlen, KOKKOS_LAMBDA(const PetscCount i) { sendbuf(i) = vv(Cperm(i)); });
230: PetscSFReduceWithMemTypeBegin(vecmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_KOKKOS, sendbuf.data(), PETSC_MEMTYPE_KOKKOS, recvbuf.data(), MPI_REPLACE);
232: if (imode == INSERT_VALUES) VecGetKokkosViewWrite(x, &xv); /* write vector */
233: else VecGetKokkosView(x, &xv); /* read & write vector */
235: Kokkos::parallel_for(
236: m, KOKKOS_LAMBDA(const PetscCount i) {
237: PetscScalar sum = 0.0;
238: for (PetscCount k = jmap1(i); k < jmap1(i + 1); k++) sum += vv(perm1(k));
239: xv(i) = (imode == INSERT_VALUES ? 0.0 : xv(i)) + sum;
240: });
242: PetscSFReduceEnd(vecmpi->coo_sf, MPIU_SCALAR, sendbuf.data(), recvbuf.data(), MPI_REPLACE);
244: /* Add received remote entries */
245: Kokkos::parallel_for(
246: vecmpi->nnz2, KOKKOS_LAMBDA(PetscCount i) {
247: for (PetscCount k = jmap2(i); k < jmap2(i + 1); k++) xv(imap2(i)) += recvbuf(perm2(k));
248: });
250: if (imode == INSERT_VALUES) VecRestoreKokkosViewWrite(x, &xv);
251: else VecRestoreKokkosView(x, &xv);
252: return 0;
253: }
255: static PetscErrorCode VecSetOps_MPIKokkos(Vec v)
256: {
257: v->ops->abs = VecAbs_SeqKokkos;
258: v->ops->reciprocal = VecReciprocal_SeqKokkos;
259: v->ops->pointwisemult = VecPointwiseMult_SeqKokkos;
260: v->ops->setrandom = VecSetRandom_SeqKokkos;
261: v->ops->dotnorm2 = VecDotNorm2_MPIKokkos;
262: v->ops->waxpy = VecWAXPY_SeqKokkos;
263: v->ops->norm = VecNorm_MPIKokkos;
264: v->ops->min = VecMin_MPIKokkos;
265: v->ops->max = VecMax_MPIKokkos;
266: v->ops->sum = VecSum_SeqKokkos;
267: v->ops->shift = VecShift_SeqKokkos;
268: v->ops->scale = VecScale_SeqKokkos;
269: v->ops->copy = VecCopy_SeqKokkos;
270: v->ops->set = VecSet_SeqKokkos;
271: v->ops->swap = VecSwap_SeqKokkos;
272: v->ops->axpy = VecAXPY_SeqKokkos;
273: v->ops->axpby = VecAXPBY_SeqKokkos;
274: v->ops->maxpy = VecMAXPY_SeqKokkos;
275: v->ops->aypx = VecAYPX_SeqKokkos;
276: v->ops->axpbypcz = VecAXPBYPCZ_SeqKokkos;
277: v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
278: v->ops->placearray = VecPlaceArray_SeqKokkos;
279: v->ops->replacearray = VecReplaceArray_SeqKokkos;
280: v->ops->resetarray = VecResetArray_SeqKokkos;
282: v->ops->dot = VecDot_MPIKokkos;
283: v->ops->tdot = VecTDot_MPIKokkos;
284: v->ops->mdot = VecMDot_MPIKokkos;
285: v->ops->mtdot = VecMTDot_MPIKokkos;
287: v->ops->dot_local = VecDot_SeqKokkos;
288: v->ops->tdot_local = VecTDot_SeqKokkos;
289: v->ops->mdot_local = VecMDot_SeqKokkos;
290: v->ops->mtdot_local = VecMTDot_SeqKokkos;
292: v->ops->norm_local = VecNorm_SeqKokkos;
293: v->ops->duplicate = VecDuplicate_MPIKokkos;
294: v->ops->destroy = VecDestroy_MPIKokkos;
295: v->ops->getlocalvector = VecGetLocalVector_SeqKokkos;
296: v->ops->restorelocalvector = VecRestoreLocalVector_SeqKokkos;
297: v->ops->getlocalvectorread = VecGetLocalVector_SeqKokkos;
298: v->ops->restorelocalvectorread = VecRestoreLocalVector_SeqKokkos;
299: v->ops->getarraywrite = VecGetArrayWrite_SeqKokkos;
300: v->ops->getarray = VecGetArray_SeqKokkos;
301: v->ops->restorearray = VecRestoreArray_SeqKokkos;
302: v->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqKokkos;
303: v->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqKokkos;
304: v->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqKokkos;
305: v->ops->getsubvector = VecGetSubVector_MPIKokkos;
306: v->ops->restoresubvector = VecRestoreSubVector_SeqKokkos;
308: v->ops->setpreallocationcoo = VecSetPreallocationCOO_MPIKokkos;
309: v->ops->setvaluescoo = VecSetValuesCOO_MPIKokkos;
310: return 0;
311: }
313: /*MC
314: VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos
316: Options Database Keys:
317: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()
319: Level: beginner
321: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
322: M*/
323: PetscErrorCode VecCreate_MPIKokkos(Vec v)
324: {
325: Vec_MPI *vecmpi;
326: Vec_Kokkos *veckok;
328: PetscKokkosInitializeCheck();
329: PetscLayoutSetUp(v->map);
330: VecCreate_MPI(v); /* Calloc host array */
332: vecmpi = static_cast<Vec_MPI *>(v->data);
333: PetscObjectChangeTypeName((PetscObject)v, VECMPIKOKKOS);
334: VecSetOps_MPIKokkos(v);
335: veckok = new Vec_Kokkos(v->map->n, vecmpi->array, NULL); /* Alloc device array but do not init it */
336: v->spptr = static_cast<void *>(veckok);
337: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
338: return 0;
339: }
341: /*@C
342: VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
343: where the user provides the GPU array space to store the vector values.
345: Collective
347: Input Parameters:
348: + comm - the MPI communicator to use
349: . bs - block size, same meaning as VecSetBlockSize()
350: . n - local vector length, cannot be PETSC_DECIDE
351: . N - global vector length (or PETSC_DECIDE to have calculated)
352: - array - the user provided GPU array to store the vector values
354: Output Parameter:
355: . vv - the vector
357: Notes:
358: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
359: same type as an existing vector.
361: If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
362: at a later stage to SET the array for storing the vector values.
364: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
365: The user should not free the array until the vector is destroyed.
367: Level: intermediate
369: .seealso: `VecCreateSeqKokkosWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
370: `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
371: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
373: @*/
374: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar darray[], Vec *v)
375: {
376: Vec w;
377: Vec_Kokkos *veckok;
378: Vec_MPI *vecmpi;
379: PetscScalar *harray;
382: PetscKokkosInitializeCheck();
383: PetscSplitOwnership(comm, &n, &N);
384: VecCreate(comm, &w);
385: VecSetSizes(w, n, N);
386: VecSetBlockSize(w, bs);
387: PetscLayoutSetUp(w->map);
389: if (std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) {
390: harray = const_cast<PetscScalar *>(darray);
391: } else PetscMalloc1(w->map->n, &harray); /* If device is not the same as host, allocate the host array ourselves */
393: VecCreate_MPI_Private(w, PETSC_FALSE /*alloc*/, 0 /*nghost*/, harray); /* Build a sequential vector with provided data */
394: vecmpi = static_cast<Vec_MPI *>(w->data);
396: if (!std::is_same<DefaultMemorySpace, Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */
398: PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS);
399: VecSetOps_MPIKokkos(w);
400: veckok = new Vec_Kokkos(n, harray, const_cast<PetscScalar *>(darray));
401: veckok->v_dual.modify_device(); /* Mark the device is modified */
402: w->spptr = static_cast<void *>(veckok);
403: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
404: *v = w;
405: return 0;
406: }
408: /*
409: VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
410: with user-provided arrays on host and device.
412: Collective
414: Input Parameter:
415: + comm - the communicator
416: . bs - the block size
417: . n - the local vector length
418: . N - the global vector length
419: - harray - host memory where the vector elements are to be stored.
420: - darray - device memory where the vector elements are to be stored.
422: Output Parameter:
423: . v - the vector
425: Notes:
426: If there is no device, then harray and darray must be the same.
427: If n is not zero, then harray and darray must be allocated.
428: After the call, the created vector is supposed to be in a synchronized state, i.e.,
429: we suppose harray and darray have the same data.
431: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
432: The user should not free the array until the vector is destroyed.
433: */
434: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar harray[], const PetscScalar darray[], Vec *v)
435: {
436: Vec w;
438: PetscKokkosInitializeCheck();
439: if (n) {
442: }
444: VecCreateMPIWithArray(comm, bs, n, N, harray, &w);
445: PetscObjectChangeTypeName((PetscObject)w, VECMPIKOKKOS); /* Change it to Kokkos */
446: VecSetOps_MPIKokkos(w);
447: w->spptr = new Vec_Kokkos(n, const_cast<PetscScalar *>(harray), const_cast<PetscScalar *>(darray));
448: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
449: *v = w;
450: return 0;
451: }
453: /*MC
454: VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos
456: Options Database Keys:
457: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()
459: Level: beginner
461: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIKokkosWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`
462: M*/
463: PetscErrorCode VecCreate_Kokkos(Vec v)
464: {
465: PetscMPIInt size;
467: MPI_Comm_size(PetscObjectComm((PetscObject)v), &size);
468: if (size == 1) VecSetType(v, VECSEQKOKKOS);
469: else VecSetType(v, VECMPIKOKKOS);
470: return 0;
471: }