Actual source code: vecviennacl.cxx
1: /*
2: Implements the sequential ViennaCL vectors.
3: */
5: #include <petscconf.h>
6: #include <petsc/private/vecimpl.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
10: #include "viennacl/linalg/inner_prod.hpp"
11: #include "viennacl/linalg/norm_1.hpp"
12: #include "viennacl/linalg/norm_2.hpp"
13: #include "viennacl/linalg/norm_inf.hpp"
15: #ifdef VIENNACL_WITH_OPENCL
16: #include "viennacl/ocl/backend.hpp"
17: #endif
19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
20: {
22: *a = 0;
23: VecViennaCLCopyToGPU(v);
24: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
25: ViennaCLWaitForGPU();
26: return 0;
27: }
29: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
30: {
32: v->offloadmask = PETSC_OFFLOAD_GPU;
34: PetscObjectStateIncrease((PetscObject)v);
35: return 0;
36: }
38: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
39: {
41: *a = 0;
42: VecViennaCLCopyToGPU(v);
43: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
44: ViennaCLWaitForGPU();
45: return 0;
46: }
48: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
49: {
51: return 0;
52: }
54: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
55: {
57: *a = 0;
58: VecViennaCLAllocateCheck(v);
59: *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
60: ViennaCLWaitForGPU();
61: return 0;
62: }
64: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
65: {
67: v->offloadmask = PETSC_OFFLOAD_GPU;
69: PetscObjectStateIncrease((PetscObject)v);
70: return 0;
71: }
73: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
74: {
75: char string[20];
76: PetscBool flg, flg_cuda, flg_opencl, flg_openmp;
78: /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
79: PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg);
80: if (flg) {
81: try {
82: PetscStrcasecmp(string, "cuda", &flg_cuda);
83: PetscStrcasecmp(string, "opencl", &flg_opencl);
84: PetscStrcasecmp(string, "openmp", &flg_openmp);
86: /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
87: if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
88: #if defined(PETSC_HAVE_CUDA)
89: else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
90: #endif
91: #if defined(PETSC_HAVE_OPENCL)
92: else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
93: #endif
94: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
95: } catch (std::exception const &ex) {
96: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
97: }
98: }
100: #if defined(PETSC_HAVE_OPENCL)
101: /* ViennaCL OpenCL device type configuration */
102: PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg);
103: if (flg) {
104: try {
105: PetscStrcasecmp(string, "cpu", &flg);
106: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
108: PetscStrcasecmp(string, "gpu", &flg);
109: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
111: PetscStrcasecmp(string, "accelerator", &flg);
112: if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
113: } catch (std::exception const &ex) {
114: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
115: }
116: }
117: #endif
119: /* Print available backends */
120: PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg);
121: if (flg) {
122: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
123: #if defined(PETSC_HAVE_CUDA)
124: PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
125: #endif
126: #if defined(PETSC_HAVE_OPENCL)
127: PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
128: #endif
129: #if defined(PETSC_HAVE_OPENMP)
130: PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
131: #else
132: PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
133: #endif
134: PetscPrintf(PETSC_COMM_WORLD, "\n");
136: /* Print selected backends */
137: PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");
138: #if defined(PETSC_HAVE_CUDA)
139: if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
140: #endif
141: #if defined(PETSC_HAVE_OPENCL)
142: if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
143: #endif
144: #if defined(PETSC_HAVE_OPENMP)
145: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
146: #else
147: if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
148: #endif
149: PetscPrintf(PETSC_COMM_WORLD, "\n");
150: }
151: return 0;
152: }
154: /*
155: Allocates space for the vector array on the Host if it does not exist.
156: Does NOT change the PetscViennaCLFlag for the vector
157: Does NOT zero the ViennaCL array
158: */
159: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
160: {
161: PetscScalar *array;
162: Vec_Seq *s;
163: PetscInt n = v->map->n;
165: s = (Vec_Seq *)v->data;
166: VecViennaCLAllocateCheck(v);
167: if (s->array == 0) {
168: PetscMalloc1(n, &array);
169: s->array = array;
170: s->array_allocated = array;
171: }
172: return 0;
173: }
175: /*
176: Allocates space for the vector array on the GPU if it does not exist.
177: Does NOT change the PetscViennaCLFlag for the vector
178: Does NOT zero the ViennaCL array
180: */
181: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
182: {
183: if (!v->spptr) {
184: try {
185: v->spptr = new Vec_ViennaCL;
186: ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
187: ((Vec_ViennaCL *)v->spptr)->GPUarray = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
189: } catch (std::exception const &ex) {
190: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
191: }
192: }
193: return 0;
194: }
196: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
197: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
198: {
200: VecViennaCLAllocateCheck(v);
201: if (v->map->n > 0) {
202: if (v->offloadmask == PETSC_OFFLOAD_CPU) {
203: PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0);
204: try {
205: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
206: viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
207: ViennaCLWaitForGPU();
208: } catch (std::exception const &ex) {
209: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
210: }
211: PetscLogCpuToGpu((v->map->n) * sizeof(PetscScalar));
212: PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0);
213: v->offloadmask = PETSC_OFFLOAD_BOTH;
214: }
215: }
216: return 0;
217: }
219: /*
220: VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
221: */
222: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
223: {
225: VecViennaCLAllocateCheckHost(v);
226: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
227: PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0);
228: try {
229: ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
230: viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
231: ViennaCLWaitForGPU();
232: } catch (std::exception const &ex) {
233: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
234: }
235: PetscLogGpuToCpu((v->map->n) * sizeof(PetscScalar));
236: PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0);
237: v->offloadmask = PETSC_OFFLOAD_BOTH;
238: }
239: return 0;
240: }
242: /* Copy on CPU */
243: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
244: {
245: PetscScalar *ya;
246: const PetscScalar *xa;
248: VecViennaCLAllocateCheckHost(xin);
249: VecViennaCLAllocateCheckHost(yin);
250: if (xin != yin) {
251: VecGetArrayRead(xin, &xa);
252: VecGetArray(yin, &ya);
253: PetscArraycpy(ya, xa, xin->map->n);
254: VecRestoreArrayRead(xin, &xa);
255: VecRestoreArray(yin, &ya);
256: }
257: return 0;
258: }
260: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
261: {
262: PetscInt n = xin->map->n, i;
263: PetscScalar *xx;
265: VecGetArray(xin, &xx);
266: for (i = 0; i < n; i++) PetscRandomGetValue(r, &xx[i]);
267: VecRestoreArray(xin, &xx);
268: return 0;
269: }
271: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
272: {
273: Vec_Seq *vs = (Vec_Seq *)v->data;
275: PetscObjectSAWsViewOff(v);
276: #if defined(PETSC_USE_LOG)
277: PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n);
278: #endif
279: if (vs->array_allocated) PetscFree(vs->array_allocated);
280: PetscFree(vs);
281: return 0;
282: }
284: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
285: {
286: Vec_Seq *v = (Vec_Seq *)vin->data;
288: v->array = v->unplacedarray;
289: v->unplacedarray = 0;
290: return 0;
291: }
293: /*MC
294: VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
296: Options Database Keys:
297: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
299: Level: beginner
301: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
302: M*/
304: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
305: {
306: const ViennaCLVector *xgpu;
307: ViennaCLVector *ygpu;
309: VecViennaCLGetArrayRead(xin, &xgpu);
310: VecViennaCLGetArray(yin, &ygpu);
311: PetscLogGpuTimeBegin();
312: try {
313: if (alpha != 0.0 && xin->map->n > 0) {
314: *ygpu = *xgpu + alpha * *ygpu;
315: PetscLogGpuFlops(2.0 * yin->map->n);
316: } else {
317: *ygpu = *xgpu;
318: }
319: ViennaCLWaitForGPU();
320: } catch (std::exception const &ex) {
321: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
322: }
323: PetscLogGpuTimeEnd();
324: VecViennaCLRestoreArrayRead(xin, &xgpu);
325: VecViennaCLRestoreArray(yin, &ygpu);
326: return 0;
327: }
329: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
330: {
331: const ViennaCLVector *xgpu;
332: ViennaCLVector *ygpu;
334: if (alpha != 0.0 && xin->map->n > 0) {
335: VecViennaCLGetArrayRead(xin, &xgpu);
336: VecViennaCLGetArray(yin, &ygpu);
337: PetscLogGpuTimeBegin();
338: try {
339: *ygpu += alpha * *xgpu;
340: ViennaCLWaitForGPU();
341: } catch (std::exception const &ex) {
342: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
343: }
344: PetscLogGpuTimeEnd();
345: VecViennaCLRestoreArrayRead(xin, &xgpu);
346: VecViennaCLRestoreArray(yin, &ygpu);
347: PetscLogGpuFlops(2.0 * yin->map->n);
348: }
349: return 0;
350: }
352: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
353: {
354: const ViennaCLVector *xgpu, *ygpu;
355: ViennaCLVector *wgpu;
357: if (xin->map->n > 0) {
358: VecViennaCLGetArrayRead(xin, &xgpu);
359: VecViennaCLGetArrayRead(yin, &ygpu);
360: VecViennaCLGetArrayWrite(win, &wgpu);
361: PetscLogGpuTimeBegin();
362: try {
363: *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
364: ViennaCLWaitForGPU();
365: } catch (std::exception const &ex) {
366: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
367: }
368: PetscLogGpuTimeEnd();
369: PetscLogGpuFlops(win->map->n);
370: VecViennaCLRestoreArrayRead(xin, &xgpu);
371: VecViennaCLRestoreArrayRead(yin, &ygpu);
372: VecViennaCLRestoreArrayWrite(win, &wgpu);
373: }
374: return 0;
375: }
377: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
378: {
379: const ViennaCLVector *xgpu, *ygpu;
380: ViennaCLVector *wgpu;
382: if (alpha == 0.0 && xin->map->n > 0) {
383: VecCopy_SeqViennaCL(yin, win);
384: } else {
385: VecViennaCLGetArrayRead(xin, &xgpu);
386: VecViennaCLGetArrayRead(yin, &ygpu);
387: VecViennaCLGetArrayWrite(win, &wgpu);
388: PetscLogGpuTimeBegin();
389: if (alpha == 1.0) {
390: try {
391: *wgpu = *ygpu + *xgpu;
392: } catch (std::exception const &ex) {
393: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
394: }
395: PetscLogGpuFlops(win->map->n);
396: } else if (alpha == -1.0) {
397: try {
398: *wgpu = *ygpu - *xgpu;
399: } catch (std::exception const &ex) {
400: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
401: }
402: PetscLogGpuFlops(win->map->n);
403: } else {
404: try {
405: *wgpu = *ygpu + alpha * *xgpu;
406: } catch (std::exception const &ex) {
407: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
408: }
409: PetscLogGpuFlops(2 * win->map->n);
410: }
411: ViennaCLWaitForGPU();
412: PetscLogGpuTimeEnd();
413: VecViennaCLRestoreArrayRead(xin, &xgpu);
414: VecViennaCLRestoreArrayRead(yin, &ygpu);
415: VecViennaCLRestoreArrayWrite(win, &wgpu);
416: }
417: return 0;
418: }
420: /*
421: * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
422: *
423: * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
424: * hence there is an iterated application of these until the final result is obtained
425: */
426: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
427: {
428: PetscInt j;
430: for (j = 0; j < nv; ++j) {
431: if (j + 1 < nv) {
432: VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]);
433: ++j;
434: } else {
435: VecAXPY_SeqViennaCL(xin, alpha[j], y[j]);
436: }
437: }
438: ViennaCLWaitForGPU();
439: return 0;
440: }
442: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
443: {
444: const ViennaCLVector *xgpu, *ygpu;
446: if (xin->map->n > 0) {
447: VecViennaCLGetArrayRead(xin, &xgpu);
448: VecViennaCLGetArrayRead(yin, &ygpu);
449: PetscLogGpuTimeBegin();
450: try {
451: *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
452: } catch (std::exception const &ex) {
453: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
454: }
455: ViennaCLWaitForGPU();
456: PetscLogGpuTimeEnd();
457: if (xin->map->n > 0) PetscLogGpuFlops(2.0 * xin->map->n - 1);
458: VecViennaCLRestoreArrayRead(xin, &xgpu);
459: VecViennaCLRestoreArrayRead(yin, &ygpu);
460: } else *z = 0.0;
461: return 0;
462: }
464: /*
465: * Operation z[j] = dot(x, y[j])
466: *
467: * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
468: */
469: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
470: {
471: PetscInt n = xin->map->n, i;
472: const ViennaCLVector *xgpu, *ygpu;
473: Vec *yyin = (Vec *)yin;
474: std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
476: if (xin->map->n > 0) {
477: VecViennaCLGetArrayRead(xin, &xgpu);
478: for (i = 0; i < nv; i++) {
479: VecViennaCLGetArrayRead(yyin[i], &ygpu);
480: ygpu_array[i] = ygpu;
481: }
482: PetscLogGpuTimeBegin();
483: viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
484: ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
485: viennacl::copy(result.begin(), result.end(), z);
486: for (i = 0; i < nv; i++) VecViennaCLRestoreArrayRead(yyin[i], &ygpu);
487: ViennaCLWaitForGPU();
488: PetscLogGpuTimeEnd();
489: VecViennaCLRestoreArrayRead(xin, &xgpu);
490: PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0));
491: } else {
492: for (i = 0; i < nv; i++) z[i] = 0.0;
493: }
494: return 0;
495: }
497: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
498: {
499: /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
500: VecMDot_SeqViennaCL(xin, nv, yin, z);
501: ViennaCLWaitForGPU();
502: return 0;
503: }
505: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
506: {
507: ViennaCLVector *xgpu;
509: if (xin->map->n > 0) {
510: VecViennaCLGetArrayWrite(xin, &xgpu);
511: PetscLogGpuTimeBegin();
512: try {
513: *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
514: ViennaCLWaitForGPU();
515: } catch (std::exception const &ex) {
516: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
517: }
518: PetscLogGpuTimeEnd();
519: VecViennaCLRestoreArrayWrite(xin, &xgpu);
520: }
521: return 0;
522: }
524: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
525: {
526: ViennaCLVector *xgpu;
528: if (alpha == 0.0 && xin->map->n > 0) {
529: VecSet_SeqViennaCL(xin, alpha);
530: PetscLogGpuFlops(xin->map->n);
531: } else if (alpha != 1.0 && xin->map->n > 0) {
532: VecViennaCLGetArray(xin, &xgpu);
533: PetscLogGpuTimeBegin();
534: try {
535: *xgpu *= alpha;
536: ViennaCLWaitForGPU();
537: } catch (std::exception const &ex) {
538: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
539: }
540: PetscLogGpuTimeEnd();
541: VecViennaCLRestoreArray(xin, &xgpu);
542: PetscLogGpuFlops(xin->map->n);
543: }
544: return 0;
545: }
547: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
548: {
549: /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
550: VecDot_SeqViennaCL(xin, yin, z);
551: ViennaCLWaitForGPU();
552: return 0;
553: }
555: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
556: {
557: const ViennaCLVector *xgpu;
558: ViennaCLVector *ygpu;
560: if (xin != yin && xin->map->n > 0) {
561: if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
562: VecViennaCLGetArrayRead(xin, &xgpu);
563: VecViennaCLGetArrayWrite(yin, &ygpu);
564: PetscLogGpuTimeBegin();
565: try {
566: *ygpu = *xgpu;
567: ViennaCLWaitForGPU();
568: } catch (std::exception const &ex) {
569: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
570: }
571: PetscLogGpuTimeEnd();
572: VecViennaCLRestoreArrayRead(xin, &xgpu);
573: VecViennaCLRestoreArrayWrite(yin, &ygpu);
575: } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
576: /* copy in CPU if we are on the CPU*/
577: VecCopy_SeqViennaCL_Private(xin, yin);
578: ViennaCLWaitForGPU();
579: } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
580: /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
581: if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
582: /* copy in CPU */
583: VecCopy_SeqViennaCL_Private(xin, yin);
584: ViennaCLWaitForGPU();
585: } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
586: /* copy in GPU */
587: VecViennaCLGetArrayRead(xin, &xgpu);
588: VecViennaCLGetArrayWrite(yin, &ygpu);
589: PetscLogGpuTimeBegin();
590: try {
591: *ygpu = *xgpu;
592: ViennaCLWaitForGPU();
593: } catch (std::exception const &ex) {
594: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
595: }
596: PetscLogGpuTimeEnd();
597: VecViennaCLRestoreArrayRead(xin, &xgpu);
598: VecViennaCLRestoreArrayWrite(yin, &ygpu);
599: } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
600: /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
601: default to copy in GPU (this is an arbitrary choice) */
602: VecViennaCLGetArrayRead(xin, &xgpu);
603: VecViennaCLGetArrayWrite(yin, &ygpu);
604: PetscLogGpuTimeBegin();
605: try {
606: *ygpu = *xgpu;
607: ViennaCLWaitForGPU();
608: } catch (std::exception const &ex) {
609: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
610: }
611: PetscLogGpuTimeEnd();
612: VecViennaCLRestoreArrayRead(xin, &xgpu);
613: VecViennaCLRestoreArrayWrite(yin, &ygpu);
614: } else {
615: VecCopy_SeqViennaCL_Private(xin, yin);
616: ViennaCLWaitForGPU();
617: }
618: }
619: }
620: return 0;
621: }
623: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
624: {
625: ViennaCLVector *xgpu, *ygpu;
627: if (xin != yin && xin->map->n > 0) {
628: VecViennaCLGetArray(xin, &xgpu);
629: VecViennaCLGetArray(yin, &ygpu);
630: PetscLogGpuTimeBegin();
631: try {
632: viennacl::swap(*xgpu, *ygpu);
633: ViennaCLWaitForGPU();
634: } catch (std::exception const &ex) {
635: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
636: }
637: PetscLogGpuTimeEnd();
638: VecViennaCLRestoreArray(xin, &xgpu);
639: VecViennaCLRestoreArray(yin, &ygpu);
640: }
641: return 0;
642: }
644: // y = alpha * x + beta * y
645: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
646: {
647: PetscScalar a = alpha, b = beta;
648: const ViennaCLVector *xgpu;
649: ViennaCLVector *ygpu;
651: if (a == 0.0 && xin->map->n > 0) {
652: VecScale_SeqViennaCL(yin, beta);
653: } else if (b == 1.0 && xin->map->n > 0) {
654: VecAXPY_SeqViennaCL(yin, alpha, xin);
655: } else if (a == 1.0 && xin->map->n > 0) {
656: VecAYPX_SeqViennaCL(yin, beta, xin);
657: } else if (b == 0.0 && xin->map->n > 0) {
658: VecViennaCLGetArrayRead(xin, &xgpu);
659: VecViennaCLGetArray(yin, &ygpu);
660: PetscLogGpuTimeBegin();
661: try {
662: *ygpu = *xgpu * alpha;
663: ViennaCLWaitForGPU();
664: } catch (std::exception const &ex) {
665: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
666: }
667: PetscLogGpuTimeEnd();
668: PetscLogGpuFlops(xin->map->n);
669: VecViennaCLRestoreArrayRead(xin, &xgpu);
670: VecViennaCLRestoreArray(yin, &ygpu);
671: } else if (xin->map->n > 0) {
672: VecViennaCLGetArrayRead(xin, &xgpu);
673: VecViennaCLGetArray(yin, &ygpu);
674: PetscLogGpuTimeBegin();
675: try {
676: *ygpu = *xgpu * alpha + *ygpu * beta;
677: ViennaCLWaitForGPU();
678: } catch (std::exception const &ex) {
679: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
680: }
681: PetscLogGpuTimeEnd();
682: VecViennaCLRestoreArrayRead(xin, &xgpu);
683: VecViennaCLRestoreArray(yin, &ygpu);
684: PetscLogGpuFlops(3.0 * xin->map->n);
685: }
686: return 0;
687: }
689: /* operation z = alpha * x + beta *y + gamma *z*/
690: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
691: {
692: PetscInt n = zin->map->n;
693: const ViennaCLVector *xgpu, *ygpu;
694: ViennaCLVector *zgpu;
696: VecViennaCLGetArrayRead(xin, &xgpu);
697: VecViennaCLGetArrayRead(yin, &ygpu);
698: VecViennaCLGetArray(zin, &zgpu);
699: if (alpha == 0.0 && xin->map->n > 0) {
700: PetscLogGpuTimeBegin();
701: try {
702: if (beta == 0.0) {
703: *zgpu = gamma * *zgpu;
704: ViennaCLWaitForGPU();
705: PetscLogGpuFlops(1.0 * n);
706: } else if (gamma == 0.0) {
707: *zgpu = beta * *ygpu;
708: ViennaCLWaitForGPU();
709: PetscLogGpuFlops(1.0 * n);
710: } else {
711: *zgpu = beta * *ygpu + gamma * *zgpu;
712: ViennaCLWaitForGPU();
713: PetscLogGpuFlops(3.0 * n);
714: }
715: } catch (std::exception const &ex) {
716: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
717: }
718: PetscLogGpuTimeEnd();
719: PetscLogGpuFlops(3.0 * n);
720: } else if (beta == 0.0 && xin->map->n > 0) {
721: PetscLogGpuTimeBegin();
722: try {
723: if (gamma == 0.0) {
724: *zgpu = alpha * *xgpu;
725: ViennaCLWaitForGPU();
726: PetscLogGpuFlops(1.0 * n);
727: } else {
728: *zgpu = alpha * *xgpu + gamma * *zgpu;
729: ViennaCLWaitForGPU();
730: PetscLogGpuFlops(3.0 * n);
731: }
732: } catch (std::exception const &ex) {
733: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
734: }
735: PetscLogGpuTimeEnd();
736: } else if (gamma == 0.0 && xin->map->n > 0) {
737: PetscLogGpuTimeBegin();
738: try {
739: *zgpu = alpha * *xgpu + beta * *ygpu;
740: ViennaCLWaitForGPU();
741: } catch (std::exception const &ex) {
742: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
743: }
744: PetscLogGpuTimeEnd();
745: PetscLogGpuFlops(3.0 * n);
746: } else if (xin->map->n > 0) {
747: PetscLogGpuTimeBegin();
748: try {
749: /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
750: if (gamma != 1.0) *zgpu *= gamma;
751: *zgpu += alpha * *xgpu + beta * *ygpu;
752: ViennaCLWaitForGPU();
753: } catch (std::exception const &ex) {
754: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
755: }
756: PetscLogGpuTimeEnd();
757: VecViennaCLRestoreArray(zin, &zgpu);
758: VecViennaCLRestoreArrayRead(xin, &xgpu);
759: VecViennaCLRestoreArrayRead(yin, &ygpu);
760: PetscLogGpuFlops(5.0 * n);
761: }
762: return 0;
763: }
765: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
766: {
767: PetscInt n = win->map->n;
768: const ViennaCLVector *xgpu, *ygpu;
769: ViennaCLVector *wgpu;
771: if (xin->map->n > 0) {
772: VecViennaCLGetArrayRead(xin, &xgpu);
773: VecViennaCLGetArrayRead(yin, &ygpu);
774: VecViennaCLGetArray(win, &wgpu);
775: PetscLogGpuTimeBegin();
776: try {
777: *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
778: ViennaCLWaitForGPU();
779: } catch (std::exception const &ex) {
780: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
781: }
782: PetscLogGpuTimeEnd();
783: VecViennaCLRestoreArrayRead(xin, &xgpu);
784: VecViennaCLRestoreArrayRead(yin, &ygpu);
785: VecViennaCLRestoreArray(win, &wgpu);
786: PetscLogGpuFlops(n);
787: }
788: return 0;
789: }
791: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
792: {
793: PetscInt n = xin->map->n;
794: PetscBLASInt bn;
795: const ViennaCLVector *xgpu;
797: if (xin->map->n > 0) {
798: PetscBLASIntCast(n, &bn);
799: VecViennaCLGetArrayRead(xin, &xgpu);
800: if (type == NORM_2 || type == NORM_FROBENIUS) {
801: PetscLogGpuTimeBegin();
802: try {
803: *z = viennacl::linalg::norm_2(*xgpu);
804: ViennaCLWaitForGPU();
805: } catch (std::exception const &ex) {
806: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
807: }
808: PetscLogGpuTimeEnd();
809: PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0));
810: } else if (type == NORM_INFINITY) {
811: PetscLogGpuTimeBegin();
812: try {
813: *z = viennacl::linalg::norm_inf(*xgpu);
814: ViennaCLWaitForGPU();
815: } catch (std::exception const &ex) {
816: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
817: }
818: PetscLogGpuTimeEnd();
819: } else if (type == NORM_1) {
820: PetscLogGpuTimeBegin();
821: try {
822: *z = viennacl::linalg::norm_1(*xgpu);
823: ViennaCLWaitForGPU();
824: } catch (std::exception const &ex) {
825: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
826: }
827: PetscLogGpuTimeEnd();
828: PetscLogGpuFlops(PetscMax(n - 1.0, 0.0));
829: } else if (type == NORM_1_AND_2) {
830: PetscLogGpuTimeBegin();
831: try {
832: *z = viennacl::linalg::norm_1(*xgpu);
833: *(z + 1) = viennacl::linalg::norm_2(*xgpu);
834: ViennaCLWaitForGPU();
835: } catch (std::exception const &ex) {
836: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
837: }
838: PetscLogGpuTimeEnd();
839: PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0));
840: PetscLogGpuFlops(PetscMax(n - 1.0, 0.0));
841: }
842: VecViennaCLRestoreArrayRead(xin, &xgpu);
843: } else if (type == NORM_1_AND_2) {
844: *z = 0.0;
845: *(z + 1) = 0.0;
846: } else *z = 0.0;
847: return 0;
848: }
850: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
851: {
852: VecSetRandom_SeqViennaCL_Private(xin, r);
853: xin->offloadmask = PETSC_OFFLOAD_CPU;
854: return 0;
855: }
857: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
858: {
860: VecViennaCLCopyFromGPU(vin);
861: VecResetArray_SeqViennaCL_Private(vin);
862: vin->offloadmask = PETSC_OFFLOAD_CPU;
863: return 0;
864: }
866: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
867: {
869: VecViennaCLCopyFromGPU(vin);
870: VecPlaceArray_Seq(vin, a);
871: vin->offloadmask = PETSC_OFFLOAD_CPU;
872: return 0;
873: }
875: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
876: {
878: VecViennaCLCopyFromGPU(vin);
879: VecReplaceArray_Seq(vin, a);
880: vin->offloadmask = PETSC_OFFLOAD_CPU;
881: return 0;
882: }
884: /*@C
885: VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
887: Collective
889: Input Parameters:
890: + comm - the communicator, should be PETSC_COMM_SELF
891: - n - the vector length
893: Output Parameter:
894: . V - the vector
896: Notes:
897: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
898: same type as an existing vector.
900: Level: intermediate
902: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
903: @*/
904: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
905: {
906: VecCreate(comm, v);
907: VecSetSizes(*v, n, n);
908: VecSetType(*v, VECSEQVIENNACL);
909: return 0;
910: }
912: /*@C
913: VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
914: where the user provides the array space to store the vector values.
916: Collective
918: Input Parameters:
919: + comm - the communicator, should be PETSC_COMM_SELF
920: . bs - the block size
921: . n - the vector length
922: - array - viennacl array where the vector elements are to be stored.
924: Output Parameter:
925: . V - the vector
927: Notes:
928: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
929: same type as an existing vector.
931: If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
932: at a later stage to SET the array for storing the vector values.
934: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
935: The user should not free the array until the vector is destroyed.
937: Level: intermediate
939: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
940: `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
941: `VecCreateMPIWithArray()`
942: @*/
943: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V)
944: {
945: PetscMPIInt size;
947: VecCreate(comm, V);
948: VecSetSizes(*V, n, n);
949: VecSetBlockSize(*V, bs);
950: MPI_Comm_size(comm, &size);
952: VecCreate_SeqViennaCL_Private(*V, array);
953: return 0;
954: }
956: /*@C
957: VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
958: the user provides the array space to store the vector values.
960: Collective
962: Input Parameters:
963: + comm - the communicator, should be PETSC_COMM_SELF
964: . bs - the block size
965: . n - the vector length
966: - cpuarray - CPU memory where the vector elements are to be stored.
967: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
969: Output Parameter:
970: . V - the vector
972: Notes:
973: If both cpuarray and viennaclvec are provided, the caller must ensure that
974: the provided arrays have identical values.
976: PETSc does NOT free the provided arrays when the vector is destroyed via
977: VecDestroy(). The user should not free the array until the vector is
978: destroyed.
980: Level: intermediate
982: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
983: `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
984: `VecViennaCLAllocateCheckHost()`
985: @*/
986: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V)
987: {
988: PetscMPIInt size;
991: MPI_Comm_size(comm, &size);
994: // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
995: VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V);
997: if (cpuarray && viennaclvec) {
998: Vec_Seq *s = (Vec_Seq *)((*V)->data);
999: s->array = (PetscScalar *)cpuarray;
1000: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1001: } else if (cpuarray) {
1002: Vec_Seq *s = (Vec_Seq *)((*V)->data);
1003: s->array = (PetscScalar *)cpuarray;
1004: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1005: } else if (viennaclvec) {
1006: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1007: } else {
1008: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1009: }
1011: return 0;
1012: }
1014: /*@C
1015: VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1016: the one provided by the user. This is useful to avoid a copy.
1018: Not Collective
1020: Input Parameters:
1021: + vec - the vector
1022: - array - the ViennaCL vector
1024: Notes:
1025: You can return to the original viennacl vector with a call to
1026: VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1027: and VecPlaceArray() at the same time on the same vector.
1029: Level: intermediate
1031: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1032: `VecCUDAPlaceArray()`,
1034: @*/
1035: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1036: {
1038: VecViennaCLCopyToGPU(vin);
1040: ((Vec_Seq *)vin->data)->unplacedarray = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1041: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1042: vin->offloadmask = PETSC_OFFLOAD_GPU;
1043: PetscObjectStateIncrease((PetscObject)vin);
1044: return 0;
1045: }
1047: /*@C
1048: VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1049: after the use of VecViennaCLPlaceArray().
1051: Not Collective
1053: Input Parameters:
1054: . vec - the vector
1056: Level: developer
1058: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1059: @*/
1060: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1061: {
1063: VecViennaCLCopyToGPU(vin);
1064: ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1065: ((Vec_Seq *)vin->data)->unplacedarray = 0;
1066: vin->offloadmask = PETSC_OFFLOAD_GPU;
1067: PetscObjectStateIncrease((PetscObject)vin);
1068: return 0;
1069: }
1071: /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1072: *
1073: * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1074: */
1075: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1076: {
1077: VecDot_SeqViennaCL(s, t, dp);
1078: VecNorm_SeqViennaCL(t, NORM_2, nm);
1079: *nm *= *nm; //squared norm required
1080: return 0;
1081: }
1083: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1084: {
1085: VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V);
1086: PetscLayoutReference(win->map, &(*V)->map);
1087: PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*V))->olist);
1088: PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*V))->qlist);
1089: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1090: return 0;
1091: }
1093: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1094: {
1095: try {
1096: if (v->spptr) {
1097: delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1098: delete (Vec_ViennaCL *)v->spptr;
1099: }
1100: } catch (char *ex) {
1101: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1102: }
1103: VecDestroy_SeqViennaCL_Private(v);
1104: return 0;
1105: }
1107: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1108: {
1109: if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1110: VecViennaCLCopyFromGPU(v);
1111: } else {
1112: VecViennaCLAllocateCheckHost(v);
1113: }
1114: *a = *((PetscScalar **)v->data);
1115: return 0;
1116: }
1118: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1119: {
1120: v->offloadmask = PETSC_OFFLOAD_CPU;
1121: return 0;
1122: }
1124: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1125: {
1126: VecViennaCLAllocateCheckHost(v);
1127: *a = *((PetscScalar **)v->data);
1128: return 0;
1129: }
1131: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1132: {
1133: V->boundtocpu = flg;
1134: if (flg) {
1135: VecViennaCLCopyFromGPU(V);
1136: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1137: V->ops->dot = VecDot_Seq;
1138: V->ops->norm = VecNorm_Seq;
1139: V->ops->tdot = VecTDot_Seq;
1140: V->ops->scale = VecScale_Seq;
1141: V->ops->copy = VecCopy_Seq;
1142: V->ops->set = VecSet_Seq;
1143: V->ops->swap = VecSwap_Seq;
1144: V->ops->axpy = VecAXPY_Seq;
1145: V->ops->axpby = VecAXPBY_Seq;
1146: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1147: V->ops->pointwisemult = VecPointwiseMult_Seq;
1148: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1149: V->ops->setrandom = VecSetRandom_Seq;
1150: V->ops->dot_local = VecDot_Seq;
1151: V->ops->tdot_local = VecTDot_Seq;
1152: V->ops->norm_local = VecNorm_Seq;
1153: V->ops->mdot_local = VecMDot_Seq;
1154: V->ops->mtdot_local = VecMTDot_Seq;
1155: V->ops->maxpy = VecMAXPY_Seq;
1156: V->ops->mdot = VecMDot_Seq;
1157: V->ops->mtdot = VecMTDot_Seq;
1158: V->ops->aypx = VecAYPX_Seq;
1159: V->ops->waxpy = VecWAXPY_Seq;
1160: V->ops->dotnorm2 = NULL;
1161: V->ops->placearray = VecPlaceArray_Seq;
1162: V->ops->replacearray = VecReplaceArray_Seq;
1163: V->ops->resetarray = VecResetArray_Seq;
1164: V->ops->duplicate = VecDuplicate_Seq;
1165: } else {
1166: V->ops->dot = VecDot_SeqViennaCL;
1167: V->ops->norm = VecNorm_SeqViennaCL;
1168: V->ops->tdot = VecTDot_SeqViennaCL;
1169: V->ops->scale = VecScale_SeqViennaCL;
1170: V->ops->copy = VecCopy_SeqViennaCL;
1171: V->ops->set = VecSet_SeqViennaCL;
1172: V->ops->swap = VecSwap_SeqViennaCL;
1173: V->ops->axpy = VecAXPY_SeqViennaCL;
1174: V->ops->axpby = VecAXPBY_SeqViennaCL;
1175: V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1176: V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1177: V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1178: V->ops->setrandom = VecSetRandom_SeqViennaCL;
1179: V->ops->dot_local = VecDot_SeqViennaCL;
1180: V->ops->tdot_local = VecTDot_SeqViennaCL;
1181: V->ops->norm_local = VecNorm_SeqViennaCL;
1182: V->ops->mdot_local = VecMDot_SeqViennaCL;
1183: V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1184: V->ops->maxpy = VecMAXPY_SeqViennaCL;
1185: V->ops->mdot = VecMDot_SeqViennaCL;
1186: V->ops->mtdot = VecMTDot_SeqViennaCL;
1187: V->ops->aypx = VecAYPX_SeqViennaCL;
1188: V->ops->waxpy = VecWAXPY_SeqViennaCL;
1189: V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1190: V->ops->placearray = VecPlaceArray_SeqViennaCL;
1191: V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1192: V->ops->resetarray = VecResetArray_SeqViennaCL;
1193: V->ops->destroy = VecDestroy_SeqViennaCL;
1194: V->ops->duplicate = VecDuplicate_SeqViennaCL;
1195: V->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
1196: V->ops->getarray = VecGetArray_SeqViennaCL;
1197: V->ops->restorearray = VecRestoreArray_SeqViennaCL;
1198: }
1199: return 0;
1200: }
1202: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1203: {
1204: PetscMPIInt size;
1206: MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
1208: VecCreate_Seq_Private(V, 0);
1209: PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL);
1211: VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE);
1212: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1214: VecViennaCLAllocateCheck(V);
1215: VecSet_SeqViennaCL(V, 0.0);
1216: return 0;
1217: }
1219: /*@C
1220: VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1222: Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1223: invoking clReleaseContext().
1225: Input Parameters:
1226: . v - the vector
1228: Output Parameters:
1229: . ctx - pointer to the underlying CL context
1231: Level: intermediate
1233: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1234: @*/
1235: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1236: {
1237: #if !defined(PETSC_HAVE_OPENCL)
1238: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1239: #else
1242: const ViennaCLVector *v_vcl;
1243: VecViennaCLGetArrayRead(v, &v_vcl);
1244: try {
1245: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1246: const cl_context ocl_ctx = vcl_ctx.handle().get();
1247: clRetainContext(ocl_ctx);
1248: *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1249: } catch (std::exception const &ex) {
1250: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1251: }
1253: return 0;
1254: #endif
1255: }
1257: /*@C
1258: VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1259: operations of the Vec are enqueued.
1261: Caller should cast (*queue) to (const cl_command_queue). Caller is
1262: responsible for invoking clReleaseCommandQueue().
1264: Input Parameters:
1265: . v - the vector
1267: Output Parameters:
1268: . ctx - pointer to the CL command queue
1270: Level: intermediate
1272: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1273: @*/
1274: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1275: {
1276: #if !defined(PETSC_HAVE_OPENCL)
1277: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1278: #else
1280: const ViennaCLVector *v_vcl;
1281: VecViennaCLGetArrayRead(v, &v_vcl);
1282: try {
1283: viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1284: const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1285: const cl_command_queue ocl_queue = vcl_queue.handle().get();
1286: clRetainCommandQueue(ocl_queue);
1287: *queue = (PETSC_UINTPTR_T)(ocl_queue);
1288: } catch (std::exception const &ex) {
1289: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1290: }
1292: return 0;
1293: #endif
1294: }
1296: /*@C
1297: VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1299: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1300: invoking clReleaseMemObject().
1302: Input Parameters:
1303: . v - the vector
1305: Output Parameters:
1306: . mem - pointer to the device buffer
1308: Level: intermediate
1310: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1311: @*/
1312: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1313: {
1314: #if !defined(PETSC_HAVE_OPENCL)
1315: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1316: #else
1318: const ViennaCLVector *v_vcl;
1319: VecViennaCLGetArrayRead(v, &v_vcl);
1320: try {
1321: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1322: clRetainMemObject(ocl_mem);
1323: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1324: } catch (std::exception const &ex) {
1325: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1326: }
1327: return 0;
1328: #endif
1329: }
1331: /*@C
1332: VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1334: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1335: invoking clReleaseMemObject().
1337: The device pointer has to be released by calling
1338: VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1339: the host will be marked as out of date. A subsequent access of the host data
1340: will thus incur a data transfer from the device to the host.
1342: Input Parameters:
1343: . v - the vector
1345: Output Parameters:
1346: . mem - pointer to the device buffer
1348: Level: intermediate
1350: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1351: @*/
1352: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1353: {
1354: #if !defined(PETSC_HAVE_OPENCL)
1355: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1356: #else
1358: ViennaCLVector *v_vcl;
1359: VecViennaCLGetArrayWrite(v, &v_vcl);
1360: try {
1361: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1362: clRetainMemObject(ocl_mem);
1363: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1364: } catch (std::exception const &ex) {
1365: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1366: }
1368: return 0;
1369: #endif
1370: }
1372: /*@C
1373: VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1374: acquired with VecViennaCLGetCLMemWrite().
1376: This marks the host data as out of date. Subsequent access to the
1377: vector data on the host side with for instance VecGetArray() incurs a
1378: data transfer.
1380: Input Parameters:
1381: . v - the vector
1383: Level: intermediate
1385: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1386: @*/
1387: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1388: {
1389: #if !defined(PETSC_HAVE_OPENCL)
1390: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1391: #else
1393: VecViennaCLRestoreArrayWrite(v, PETSC_NULL);
1395: return 0;
1396: #endif
1397: }
1399: /*@C
1400: VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1402: Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1403: invoking clReleaseMemObject().
1405: The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1406: Upon restoring the vector data the data on the host will be marked as out of
1407: date. A subsequent access of the host data will thus incur a data transfer
1408: from the device to the host.
1410: Input Parameters:
1411: . v - the vector
1413: Output Parameters:
1414: . mem - pointer to the device buffer
1416: Level: intermediate
1418: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1419: @*/
1420: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1421: {
1422: #if !defined(PETSC_HAVE_OPENCL)
1423: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1424: #else
1426: ViennaCLVector *v_vcl;
1427: VecViennaCLGetArray(v, &v_vcl);
1428: try {
1429: const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1430: clRetainMemObject(ocl_mem);
1431: *mem = (PETSC_UINTPTR_T)(ocl_mem);
1432: } catch (std::exception const &ex) {
1433: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1434: }
1436: return 0;
1437: #endif
1438: }
1440: /*@C
1441: VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1442: acquired with VecViennaCLGetCLMem().
1444: This marks the host data as out of date. Subsequent access to the vector
1445: data on the host side with for instance VecGetArray() incurs a data
1446: transfer.
1448: Input Parameters:
1449: . v - the vector
1451: Level: intermediate
1453: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1454: @*/
1455: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1456: {
1457: #if !defined(PETSC_HAVE_OPENCL)
1458: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1459: #else
1461: VecViennaCLRestoreArray(v, PETSC_NULL);
1463: return 0;
1464: #endif
1465: }
1467: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1468: {
1469: Vec_ViennaCL *vecviennacl;
1470: PetscMPIInt size;
1472: MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
1474: VecCreate_Seq_Private(V, 0);
1475: PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL);
1476: VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE);
1477: V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1479: if (array) {
1480: if (!V->spptr) V->spptr = new Vec_ViennaCL;
1481: vecviennacl = (Vec_ViennaCL *)V->spptr;
1482: vecviennacl->GPUarray_allocated = 0;
1483: vecviennacl->GPUarray = (ViennaCLVector *)array;
1484: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1485: }
1487: return 0;
1488: }