Actual source code: mpiviennacl.cxx
2: /*
3: This file contains routines for Parallel vector operations.
4: */
5: #include <petscconf.h>
6: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
7: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
9: /*MC
10: VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.
12: Options Database Keys:
13: . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()
15: Level: beginner
17: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQVIENNACL`, `VECMPIVIENNACL`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
18: M*/
20: PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
21: {
22: try {
23: if (v->spptr) {
24: delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
25: delete (Vec_ViennaCL *)v->spptr;
26: }
27: } catch (std::exception const &ex) {
28: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
29: }
30: VecDestroy_MPI(v);
31: return 0;
32: }
34: PetscErrorCode VecNorm_MPIViennaCL(Vec xin, NormType type, PetscReal *z)
35: {
36: PetscReal sum, work = 0.0;
38: if (type == NORM_2 || type == NORM_FROBENIUS) {
39: VecNorm_SeqViennaCL(xin, NORM_2, &work);
40: work *= work;
41: MPIU_Allreduce(&work, &sum, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
42: *z = PetscSqrtReal(sum);
43: } else if (type == NORM_1) {
44: /* Find the local part */
45: VecNorm_SeqViennaCL(xin, NORM_1, &work);
46: /* Find the global max */
47: MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
48: } else if (type == NORM_INFINITY) {
49: /* Find the local max */
50: VecNorm_SeqViennaCL(xin, NORM_INFINITY, &work);
51: /* Find the global max */
52: MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin));
53: } else if (type == NORM_1_AND_2) {
54: PetscReal temp[2];
55: VecNorm_SeqViennaCL(xin, NORM_1, temp);
56: VecNorm_SeqViennaCL(xin, NORM_2, temp + 1);
57: temp[1] = temp[1] * temp[1];
58: MPIU_Allreduce(temp, z, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin));
59: z[1] = PetscSqrtReal(z[1]);
60: }
61: return 0;
62: }
64: PetscErrorCode VecDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
65: {
66: PetscScalar sum, work;
68: VecDot_SeqViennaCL(xin, yin, &work);
69: MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
70: *z = sum;
71: return 0;
72: }
74: PetscErrorCode VecTDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
75: {
76: PetscScalar sum, work;
78: VecTDot_SeqViennaCL(xin, yin, &work);
79: MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
80: *z = sum;
81: return 0;
82: }
84: PetscErrorCode VecMDot_MPIViennaCL(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
85: {
86: PetscScalar awork[128], *work = awork;
88: if (nv > 128) PetscMalloc1(nv, &work);
89: VecMDot_SeqViennaCL(xin, nv, y, work);
90: MPIU_Allreduce(work, z, nv, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin));
91: if (nv > 128) PetscFree(work);
92: return 0;
93: }
95: /*MC
96: VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL
98: Options Database Keys:
99: . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()
101: Level: beginner
103: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
104: M*/
106: PetscErrorCode VecDuplicate_MPIViennaCL(Vec win, Vec *v)
107: {
108: Vec_MPI *vw, *w = (Vec_MPI *)win->data;
109: PetscScalar *array;
111: VecCreate(PetscObjectComm((PetscObject)win), v);
112: PetscLayoutReference(win->map, &(*v)->map);
114: VecCreate_MPI_Private(*v, PETSC_FALSE, w->nghost, 0);
115: vw = (Vec_MPI *)(*v)->data;
116: PetscMemcpy((*v)->ops, win->ops, sizeof(struct _VecOps));
118: /* save local representation of the parallel vector (and scatter) if it exists */
119: if (w->localrep) {
120: VecGetArray(*v, &array);
121: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, win->map->n + w->nghost, array, &vw->localrep);
122: PetscMemcpy(vw->localrep->ops, w->localrep->ops, sizeof(struct _VecOps));
123: VecRestoreArray(*v, &array);
124: vw->localupdate = w->localupdate;
125: if (vw->localupdate) PetscObjectReference((PetscObject)vw->localupdate);
126: }
128: /* New vector should inherit stashing property of parent */
129: (*v)->stash.donotstash = win->stash.donotstash;
130: (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
132: /* change type_name appropriately */
133: PetscObjectChangeTypeName((PetscObject)(*v), VECMPIVIENNACL);
135: PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*v))->olist);
136: PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*v))->qlist);
137: (*v)->map->bs = PetscAbs(win->map->bs);
138: (*v)->bstash.bs = win->bstash.bs;
139: return 0;
140: }
142: PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
143: {
144: PetscScalar work[2], sum[2];
146: VecDotNorm2_SeqViennaCL(s, t, work, work + 1);
147: MPIU_Allreduce((void *)&work, (void *)&sum, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)s));
148: *dp = sum[0];
149: *nm = sum[1];
150: return 0;
151: }
153: PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool bind)
154: {
155: vv->boundtocpu = bind;
157: if (bind) {
158: VecViennaCLCopyFromGPU(vv);
159: vv->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
160: vv->ops->dotnorm2 = NULL;
161: vv->ops->waxpy = VecWAXPY_Seq;
162: vv->ops->dot = VecDot_MPI;
163: vv->ops->mdot = VecMDot_MPI;
164: vv->ops->tdot = VecTDot_MPI;
165: vv->ops->norm = VecNorm_MPI;
166: vv->ops->scale = VecScale_Seq;
167: vv->ops->copy = VecCopy_Seq;
168: vv->ops->set = VecSet_Seq;
169: vv->ops->swap = VecSwap_Seq;
170: vv->ops->axpy = VecAXPY_Seq;
171: vv->ops->axpby = VecAXPBY_Seq;
172: vv->ops->maxpy = VecMAXPY_Seq;
173: vv->ops->aypx = VecAYPX_Seq;
174: vv->ops->axpbypcz = VecAXPBYPCZ_Seq;
175: vv->ops->pointwisemult = VecPointwiseMult_Seq;
176: vv->ops->setrandom = VecSetRandom_Seq;
177: vv->ops->placearray = VecPlaceArray_Seq;
178: vv->ops->replacearray = VecReplaceArray_Seq;
179: vv->ops->resetarray = VecResetArray_Seq;
180: vv->ops->dot_local = VecDot_Seq;
181: vv->ops->tdot_local = VecTDot_Seq;
182: vv->ops->norm_local = VecNorm_Seq;
183: vv->ops->mdot_local = VecMDot_Seq;
184: vv->ops->pointwisedivide = VecPointwiseDivide_Seq;
185: vv->ops->getlocalvector = NULL;
186: vv->ops->restorelocalvector = NULL;
187: vv->ops->getlocalvectorread = NULL;
188: vv->ops->restorelocalvectorread = NULL;
189: vv->ops->getarraywrite = NULL;
190: } else {
191: vv->ops->dotnorm2 = VecDotNorm2_MPIViennaCL;
192: vv->ops->waxpy = VecWAXPY_SeqViennaCL;
193: vv->ops->duplicate = VecDuplicate_MPIViennaCL;
194: vv->ops->dot = VecDot_MPIViennaCL;
195: vv->ops->mdot = VecMDot_MPIViennaCL;
196: vv->ops->tdot = VecTDot_MPIViennaCL;
197: vv->ops->norm = VecNorm_MPIViennaCL;
198: vv->ops->scale = VecScale_SeqViennaCL;
199: vv->ops->copy = VecCopy_SeqViennaCL;
200: vv->ops->set = VecSet_SeqViennaCL;
201: vv->ops->swap = VecSwap_SeqViennaCL;
202: vv->ops->axpy = VecAXPY_SeqViennaCL;
203: vv->ops->axpby = VecAXPBY_SeqViennaCL;
204: vv->ops->maxpy = VecMAXPY_SeqViennaCL;
205: vv->ops->aypx = VecAYPX_SeqViennaCL;
206: vv->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
207: vv->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
208: vv->ops->setrandom = VecSetRandom_SeqViennaCL;
209: vv->ops->dot_local = VecDot_SeqViennaCL;
210: vv->ops->tdot_local = VecTDot_SeqViennaCL;
211: vv->ops->norm_local = VecNorm_SeqViennaCL;
212: vv->ops->mdot_local = VecMDot_SeqViennaCL;
213: vv->ops->destroy = VecDestroy_MPIViennaCL;
214: vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
215: vv->ops->placearray = VecPlaceArray_SeqViennaCL;
216: vv->ops->replacearray = VecReplaceArray_SeqViennaCL;
217: vv->ops->resetarray = VecResetArray_SeqViennaCL;
218: vv->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
219: vv->ops->getarray = VecGetArray_SeqViennaCL;
220: vv->ops->restorearray = VecRestoreArray_SeqViennaCL;
221: }
222: return 0;
223: }
225: PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
226: {
227: PetscLayoutSetUp(vv->map);
228: VecViennaCLAllocateCheck(vv);
229: VecCreate_MPIViennaCL_Private(vv, PETSC_FALSE, 0, ((Vec_ViennaCL *)(vv->spptr))->GPUarray);
230: VecViennaCLAllocateCheckHost(vv);
231: VecSet(vv, 0.0);
232: VecSet_Seq(vv, 0.0);
233: vv->offloadmask = PETSC_OFFLOAD_BOTH;
234: return 0;
235: }
237: PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
238: {
239: PetscMPIInt size;
241: MPI_Comm_size(PetscObjectComm((PetscObject)v), &size);
242: if (size == 1) {
243: VecSetType(v, VECSEQVIENNACL);
244: } else {
245: VecSetType(v, VECMPIVIENNACL);
246: }
247: return 0;
248: }
250: /*@C
251: VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
252: where the user provides the viennacl vector to store the vector values.
254: Collective
256: Input Parameters:
257: + comm - the MPI communicator to use
258: . bs - block size, same meaning as VecSetBlockSize()
259: . n - local vector length, cannot be PETSC_DECIDE
260: . N - global vector length (or PETSC_DECIDE to have calculated)
261: - array - the user provided GPU array to store the vector values
263: Output Parameter:
264: . vv - the vector
266: Notes:
267: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
268: same type as an existing vector.
270: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
271: at a later stage to SET the array for storing the vector values.
273: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
274: The user should not free the array until the vector is destroyed.
276: Level: intermediate
278: .seealso: `VecCreateSeqViennaCLWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
279: `VecCreate()`, `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`
281: @*/
282: PetscErrorCode VecCreateMPIViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const ViennaCLVector *array, Vec *vv)
283: {
285: PetscSplitOwnership(comm, &n, &N);
286: VecCreate(comm, vv);
287: VecSetSizes(*vv, n, N);
288: VecSetBlockSize(*vv, bs);
289: VecCreate_MPIViennaCL_Private(*vv, PETSC_FALSE, 0, array);
290: return 0;
291: }
293: /*@C
294: VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
295: where the user provides the ViennaCL vector to store the vector values.
297: Collective
299: Input Parameters:
300: + comm - the MPI communicator to use
301: . bs - block size, same meaning as VecSetBlockSize()
302: . n - local vector length, cannot be PETSC_DECIDE
303: . N - global vector length (or PETSC_DECIDE to have calculated)
304: - cpuarray - the user provided CPU array to store the vector values
305: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
307: Output Parameter:
308: . vv - the vector
310: Notes:
311: If both cpuarray and viennaclvec are provided, the caller must ensure that
312: the provided arrays have identical values.
314: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
315: same type as an existing vector.
317: PETSc does NOT free the provided arrays when the vector is destroyed via
318: VecDestroy(). The user should not free the array until the vector is
319: destroyed.
321: Level: intermediate
323: .seealso: `VecCreateSeqViennaCLWithArrays()`, `VecCreateMPIWithArray()`
324: `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
325: `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`,
326: `VecPlaceArray()`, `VecCreateMPICUDAWithArrays()`,
327: `VecViennaCLAllocateCheckHost()`
328: @*/
329: PetscErrorCode VecCreateMPIViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *vv)
330: {
331: VecCreateMPIViennaCLWithArray(comm, bs, n, N, viennaclvec, vv);
332: if (cpuarray && viennaclvec) {
333: Vec_MPI *s = (Vec_MPI *)((*vv)->data);
334: s->array = (PetscScalar *)cpuarray;
335: (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
336: } else if (cpuarray) {
337: Vec_MPI *s = (Vec_MPI *)((*vv)->data);
338: s->array = (PetscScalar *)cpuarray;
339: (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
340: } else if (viennaclvec) {
341: (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
342: } else {
343: (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
344: }
345: return 0;
346: }
348: PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv, PetscBool alloc, PetscInt nghost, const ViennaCLVector *array)
349: {
350: Vec_ViennaCL *vecviennacl;
352: VecCreate_MPI_Private(vv, PETSC_FALSE, 0, 0);
353: PetscObjectChangeTypeName((PetscObject)vv, VECMPIVIENNACL);
355: VecBindToCPU_MPIViennaCL(vv, PETSC_FALSE);
356: vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;
358: if (alloc && !array) {
359: VecViennaCLAllocateCheck(vv);
360: VecViennaCLAllocateCheckHost(vv);
361: VecSet(vv, 0.0);
362: VecSet_Seq(vv, 0.0);
363: vv->offloadmask = PETSC_OFFLOAD_BOTH;
364: }
365: if (array) {
366: if (!vv->spptr) vv->spptr = new Vec_ViennaCL;
367: vecviennacl = (Vec_ViennaCL *)vv->spptr;
368: vecviennacl->GPUarray_allocated = 0;
369: vecviennacl->GPUarray = (ViennaCLVector *)array;
370: vv->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
371: }
373: return 0;
374: }