Actual source code: vechip.hip.cpp
1: /*
2: Implementation of the sequential hip vectors.
4: This file contains the code that can be compiled with a C
5: compiler. The companion file vechip2.hip.cpp contains the code that
6: must be compiled with hipcc compiler.
7: */
9: #define PETSC_SKIP_SPINLOCK
11: #include <petscconf.h>
12: #include <petsc/private/vecimpl.h>
13: #include <../src/vec/vec/impls/dvecimpl.h>
14: #include <petsc/private/hipvecimpl.h>
16: PetscErrorCode VecHIPGetArrays_Private(Vec v, const PetscScalar **x, const PetscScalar **x_d, PetscOffloadMask *flg)
17: {
19: if (x) {
20: Vec_Seq *h = (Vec_Seq *)v->data;
22: *x = h->array;
23: }
24: if (x_d) {
25: Vec_HIP *d = (Vec_HIP *)v->spptr;
27: *x_d = d ? d->GPUarray : NULL;
28: }
29: if (flg) *flg = v->offloadmask;
30: return 0;
31: }
33: /*
34: Allocates space for the vector array on the Host if it does not exist.
35: Does NOT change the PetscHIPFlag for the vector
36: Does NOT zero the HIP array
37: */
38: PetscErrorCode VecHIPAllocateCheckHost(Vec v)
39: {
40: PetscScalar *array;
41: Vec_Seq *s = (Vec_Seq *)v->data;
42: PetscInt n = v->map->n;
44: if (!s) {
45: PetscNew(&s);
46: v->data = s;
47: }
48: if (!s->array) {
49: if (n * sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
50: PetscMallocSetHIPHost();
51: v->pinned_memory = PETSC_TRUE;
52: }
53: PetscMalloc1(n, &array);
54: s->array = array;
55: s->array_allocated = array;
56: if (n * sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) PetscMallocResetHIPHost();
57: if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) v->offloadmask = PETSC_OFFLOAD_CPU;
58: }
59: return 0;
60: }
62: PetscErrorCode VecCopy_SeqHIP_Private(Vec xin, Vec yin)
63: {
64: PetscScalar *ya;
65: const PetscScalar *xa;
67: VecHIPAllocateCheckHost(xin);
68: VecHIPAllocateCheckHost(yin);
69: if (xin != yin) {
70: VecGetArrayRead(xin, &xa);
71: VecGetArray(yin, &ya);
72: PetscArraycpy(ya, xa, xin->map->n);
73: VecRestoreArrayRead(xin, &xa);
74: VecRestoreArray(yin, &ya);
75: }
76: return 0;
77: }
79: PetscErrorCode VecSetRandom_SeqHIP(Vec xin, PetscRandom r)
80: {
81: PetscInt n = xin->map->n;
82: PetscScalar *xx;
84: VecGetArrayWrite(xin, &xx);
85: PetscRandomGetValues(r, n, xx);
86: VecRestoreArrayWrite(xin, &xx);
87: return 0;
88: }
90: PetscErrorCode VecDestroy_SeqHIP_Private(Vec v)
91: {
92: Vec_Seq *vs = (Vec_Seq *)v->data;
94: PetscObjectSAWsViewOff(v);
95: #if defined(PETSC_USE_LOG)
96: PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n);
97: #endif
98: if (vs) {
99: if (vs->array_allocated) {
100: if (v->pinned_memory) PetscMallocSetHIPHost();
101: PetscFree(vs->array_allocated);
102: if (v->pinned_memory) {
103: PetscMallocResetHIPHost();
104: v->pinned_memory = PETSC_FALSE;
105: }
106: }
107: PetscFree(vs);
108: }
109: return 0;
110: }
112: PetscErrorCode VecResetArray_SeqHIP_Private(Vec vin)
113: {
114: Vec_Seq *v = (Vec_Seq *)vin->data;
116: v->array = v->unplacedarray;
117: v->unplacedarray = 0;
118: return 0;
119: }
121: PetscErrorCode VecResetArray_SeqHIP(Vec vin)
122: {
123: VecHIPCopyFromGPU(vin);
124: VecResetArray_SeqHIP_Private(vin);
125: vin->offloadmask = PETSC_OFFLOAD_CPU;
126: return 0;
127: }
129: PetscErrorCode VecPlaceArray_SeqHIP(Vec vin, const PetscScalar *a)
130: {
131: VecHIPCopyFromGPU(vin);
132: VecPlaceArray_Seq(vin, a);
133: vin->offloadmask = PETSC_OFFLOAD_CPU;
134: return 0;
135: }
137: PetscErrorCode VecReplaceArray_SeqHIP(Vec vin, const PetscScalar *a)
138: {
139: Vec_Seq *vs = (Vec_Seq *)vin->data;
141: if (vs->array != vs->array_allocated) {
142: /* make sure the users array has the latest values */
143: VecHIPCopyFromGPU(vin);
144: }
145: if (vs->array_allocated) {
146: if (vin->pinned_memory) PetscMallocSetHIPHost();
147: PetscFree(vs->array_allocated);
148: if (vin->pinned_memory) PetscMallocResetHIPHost();
149: }
150: vin->pinned_memory = PETSC_FALSE;
151: vs->array_allocated = vs->array = (PetscScalar *)a;
152: vin->offloadmask = PETSC_OFFLOAD_CPU;
153: return 0;
154: }
156: /*@
157: VecCreateSeqHIP - Creates a standard, sequential array-style vector.
159: Collective
161: Input Parameter:
162: + comm - the communicator, should be PETSC_COMM_SELF
163: - n - the vector length
165: Output Parameter:
166: . v - the vector
168: Notes:
169: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
170: same type as an existing vector.
172: Level: intermediate
174: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
175: @*/
176: PetscErrorCode VecCreateSeqHIP(MPI_Comm comm, PetscInt n, Vec *v)
177: {
178: VecCreate(comm, v);
179: VecSetSizes(*v, n, n);
180: VecSetType(*v, VECSEQHIP);
181: return 0;
182: }
184: PetscErrorCode VecDuplicate_SeqHIP(Vec win, Vec *V)
185: {
186: VecCreateSeqHIP(PetscObjectComm((PetscObject)win), win->map->n, V);
187: PetscLayoutReference(win->map, &(*V)->map);
188: PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*V))->olist);
189: PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*V))->qlist);
190: (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
191: return 0;
192: }
194: PetscErrorCode VecCreate_SeqHIP(Vec V)
195: {
196: PetscDeviceInitialize(PETSC_DEVICE_HIP);
197: PetscLayoutSetUp(V->map);
198: VecHIPAllocateCheck(V);
199: VecCreate_SeqHIP_Private(V, ((Vec_HIP *)V->spptr)->GPUarray_allocated);
200: VecSet_SeqHIP(V, 0.0);
201: return 0;
202: }
204: /*@C
205: VecCreateSeqHIPWithArray - Creates a HIP sequential array-style vector,
206: where the user provides the array space to store the vector values. The array
207: provided must be a GPU array.
209: Collective
211: Input Parameters:
212: + comm - the communicator, should be PETSC_COMM_SELF
213: . bs - the block size
214: . n - the vector length
215: - array - GPU memory where the vector elements are to be stored.
217: Output Parameter:
218: . V - the vector
220: Notes:
221: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
222: same type as an existing vector.
224: If the user-provided array is NULL, then VecHIPPlaceArray() can be used
225: at a later stage to SET the array for storing the vector values.
227: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
228: The user should not free the array until the vector is destroyed.
230: Level: intermediate
232: .seealso: `VecCreateMPIHIPWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
233: `VecCreateGhost()`, `VecCreateSeq()`, `VecHIPPlaceArray()`, `VecCreateSeqWithArray()`,
234: `VecCreateMPIWithArray()`
235: @*/
236: PetscErrorCode VecCreateSeqHIPWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar array[], Vec *V)
237: {
238: PetscDeviceInitialize(PETSC_DEVICE_HIP);
239: VecCreate(comm, V);
240: VecSetSizes(*V, n, n);
241: VecSetBlockSize(*V, bs);
242: VecCreate_SeqHIP_Private(*V, array);
243: return 0;
244: }
246: /*@C
247: VecCreateSeqHIPWithArrays - Creates a HIP sequential array-style vector,
248: where the user provides the array space to store the vector values.
250: Collective
252: Input Parameters:
253: + comm - the communicator, should be PETSC_COMM_SELF
254: . bs - the block size
255: . n - the vector length
256: - cpuarray - CPU memory where the vector elements are to be stored.
257: - gpuarray - GPU memory where the vector elements are to be stored.
259: Output Parameter:
260: . V - the vector
262: Notes:
263: If both cpuarray and gpuarray are provided, the caller must ensure that
264: the provided arrays have identical values.
266: PETSc does NOT free the provided arrays when the vector is destroyed via
267: VecDestroy(). The user should not free the array until the vector is
268: destroyed.
270: Level: intermediate
272: .seealso: `VecCreateMPIHIPWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
273: `VecHIPPlaceArray()`, `VecCreateSeqHIPWithArray()`,
274: `VecHIPAllocateCheckHost()`
275: @*/
276: PetscErrorCode VecCreateSeqHIPWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *V)
277: {
278: // set V's gpuarray to be gpuarray, do not allocate memory on host yet.
279: VecCreateSeqHIPWithArray(comm, bs, n, gpuarray, V);
281: if (cpuarray && gpuarray) {
282: Vec_Seq *s = (Vec_Seq *)((*V)->data);
283: s->array = (PetscScalar *)cpuarray;
284: (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
285: } else if (cpuarray) {
286: Vec_Seq *s = (Vec_Seq *)((*V)->data);
287: s->array = (PetscScalar *)cpuarray;
288: (*V)->offloadmask = PETSC_OFFLOAD_CPU;
289: } else if (gpuarray) {
290: (*V)->offloadmask = PETSC_OFFLOAD_GPU;
291: } else {
292: (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
293: }
295: return 0;
296: }
298: PetscErrorCode VecGetArray_SeqHIP(Vec v, PetscScalar **a)
299: {
300: VecHIPCopyFromGPU(v);
301: *a = *((PetscScalar **)v->data);
302: return 0;
303: }
305: PetscErrorCode VecRestoreArray_SeqHIP(Vec v, PetscScalar **a)
306: {
307: v->offloadmask = PETSC_OFFLOAD_CPU;
308: return 0;
309: }
311: PetscErrorCode VecGetArrayWrite_SeqHIP(Vec v, PetscScalar **a)
312: {
313: VecHIPAllocateCheckHost(v);
314: *a = *((PetscScalar **)v->data);
315: return 0;
316: }
318: PetscErrorCode VecGetArrayAndMemType_SeqHIP(Vec v, PetscScalar **a, PetscMemType *mtype)
319: {
320: VecHIPCopyToGPU(v);
321: *a = ((Vec_HIP *)v->spptr)->GPUarray;
322: if (mtype) *mtype = PETSC_MEMTYPE_HIP;
323: return 0;
324: }
326: PetscErrorCode VecRestoreArrayAndMemType_SeqHIP(Vec v, PetscScalar **a)
327: {
328: v->offloadmask = PETSC_OFFLOAD_GPU;
329: return 0;
330: }
332: PetscErrorCode VecGetArrayWriteAndMemType_SeqHIP(Vec v, PetscScalar **a, PetscMemType *mtype)
333: {
334: /* Allocate memory (not zeroed) on device if not yet, but no need to sync data from host to device */
335: VecHIPAllocateCheck(v);
336: *a = ((Vec_HIP *)v->spptr)->GPUarray;
337: if (mtype) *mtype = PETSC_MEMTYPE_HIP;
338: return 0;
339: }
341: PetscErrorCode VecBindToCPU_SeqHIP(Vec V, PetscBool bind)
342: {
343: V->boundtocpu = bind;
344: if (bind) {
345: VecHIPCopyFromGPU(V);
346: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
347: V->ops->dot = VecDot_Seq;
348: V->ops->norm = VecNorm_Seq;
349: V->ops->tdot = VecTDot_Seq;
350: V->ops->scale = VecScale_Seq;
351: V->ops->copy = VecCopy_Seq;
352: V->ops->set = VecSet_Seq;
353: V->ops->swap = VecSwap_Seq;
354: V->ops->axpy = VecAXPY_Seq;
355: V->ops->axpby = VecAXPBY_Seq;
356: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
357: V->ops->pointwisemult = VecPointwiseMult_Seq;
358: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
359: V->ops->setrandom = VecSetRandom_Seq;
360: V->ops->dot_local = VecDot_Seq;
361: V->ops->tdot_local = VecTDot_Seq;
362: V->ops->norm_local = VecNorm_Seq;
363: V->ops->mdot_local = VecMDot_Seq;
364: V->ops->mtdot_local = VecMTDot_Seq;
365: V->ops->maxpy = VecMAXPY_Seq;
366: V->ops->mdot = VecMDot_Seq;
367: V->ops->mtdot = VecMTDot_Seq;
368: V->ops->aypx = VecAYPX_Seq;
369: V->ops->waxpy = VecWAXPY_Seq;
370: V->ops->dotnorm2 = NULL;
371: V->ops->placearray = VecPlaceArray_Seq;
372: V->ops->replacearray = VecReplaceArray_SeqHIP;
373: V->ops->resetarray = VecResetArray_Seq;
374: V->ops->duplicate = VecDuplicate_Seq;
375: V->ops->conjugate = VecConjugate_Seq;
376: V->ops->getlocalvector = NULL;
377: V->ops->restorelocalvector = NULL;
378: V->ops->getlocalvectorread = NULL;
379: V->ops->restorelocalvectorread = NULL;
380: V->ops->getarraywrite = NULL;
381: V->ops->getarrayandmemtype = NULL;
382: V->ops->restorearrayandmemtype = NULL;
383: V->ops->getarraywriteandmemtype = NULL;
384: V->ops->max = VecMax_Seq;
385: V->ops->min = VecMin_Seq;
386: V->ops->reciprocal = VecReciprocal_Default;
387: V->ops->sum = NULL;
388: V->ops->shift = NULL;
389: } else {
390: V->ops->dot = VecDot_SeqHIP;
391: V->ops->norm = VecNorm_SeqHIP;
392: V->ops->tdot = VecTDot_SeqHIP;
393: V->ops->scale = VecScale_SeqHIP;
394: V->ops->copy = VecCopy_SeqHIP;
395: V->ops->set = VecSet_SeqHIP;
396: V->ops->swap = VecSwap_SeqHIP;
397: V->ops->axpy = VecAXPY_SeqHIP;
398: V->ops->axpby = VecAXPBY_SeqHIP;
399: V->ops->axpbypcz = VecAXPBYPCZ_SeqHIP;
400: V->ops->pointwisemult = VecPointwiseMult_SeqHIP;
401: V->ops->pointwisedivide = VecPointwiseDivide_SeqHIP;
402: V->ops->setrandom = VecSetRandom_SeqHIP;
403: V->ops->dot_local = VecDot_SeqHIP;
404: V->ops->tdot_local = VecTDot_SeqHIP;
405: V->ops->norm_local = VecNorm_SeqHIP;
406: V->ops->mdot_local = VecMDot_SeqHIP;
407: V->ops->maxpy = VecMAXPY_SeqHIP;
408: V->ops->mdot = VecMDot_SeqHIP;
409: V->ops->aypx = VecAYPX_SeqHIP;
410: V->ops->waxpy = VecWAXPY_SeqHIP;
411: V->ops->dotnorm2 = VecDotNorm2_SeqHIP;
412: V->ops->placearray = VecPlaceArray_SeqHIP;
413: V->ops->replacearray = VecReplaceArray_SeqHIP;
414: V->ops->resetarray = VecResetArray_SeqHIP;
415: V->ops->destroy = VecDestroy_SeqHIP;
416: V->ops->duplicate = VecDuplicate_SeqHIP;
417: V->ops->conjugate = VecConjugate_SeqHIP;
418: V->ops->getlocalvector = VecGetLocalVector_SeqHIP;
419: V->ops->restorelocalvector = VecRestoreLocalVector_SeqHIP;
420: V->ops->getlocalvectorread = VecGetLocalVectorRead_SeqHIP;
421: V->ops->restorelocalvectorread = VecRestoreLocalVectorRead_SeqHIP;
422: V->ops->getarraywrite = VecGetArrayWrite_SeqHIP;
423: V->ops->getarray = VecGetArray_SeqHIP;
424: V->ops->restorearray = VecRestoreArray_SeqHIP;
425: V->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqHIP;
426: V->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqHIP;
427: V->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqHIP;
428: V->ops->max = VecMax_SeqHIP;
429: V->ops->min = VecMin_SeqHIP;
430: V->ops->reciprocal = VecReciprocal_SeqHIP;
431: V->ops->sum = VecSum_SeqHIP;
432: V->ops->shift = VecShift_SeqHIP;
433: }
434: return 0;
435: }
437: PetscErrorCode VecCreate_SeqHIP_Private(Vec V, const PetscScalar *array)
438: {
439: Vec_HIP *vechip;
440: PetscMPIInt size;
441: PetscBool option_set;
443: MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
445: VecCreate_Seq_Private(V, 0);
446: PetscObjectChangeTypeName((PetscObject)V, VECSEQHIP);
447: VecBindToCPU_SeqHIP(V, PETSC_FALSE);
448: V->ops->bindtocpu = VecBindToCPU_SeqHIP;
450: /* Later, functions check for the Vec_HIP structure existence, so do not create it without array */
451: if (array) {
452: if (!V->spptr) {
453: PetscReal pinned_memory_min;
455: PetscCalloc(sizeof(Vec_HIP), &V->spptr);
456: vechip = (Vec_HIP *)V->spptr;
457: V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
459: pinned_memory_min = 0;
460: /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
461: Note: This same code duplicated in VecHIPAllocateCheck() and VecCreate_MPIHIP_Private(). Is there a good way to avoid this? */
462: PetscOptionsBegin(PetscObjectComm((PetscObject)V), ((PetscObject)V)->prefix, "VECHIP Options", "Vec");
463: PetscOptionsReal("-vec_pinned_memory_min", "Minimum size (in bytes) for an allocation to use pinned memory on host", "VecSetPinnedMemoryMin", pinned_memory_min, &pinned_memory_min, &option_set);
464: if (option_set) V->minimum_bytes_pinned_memory = pinned_memory_min;
465: PetscOptionsEnd();
466: }
467: vechip = (Vec_HIP *)V->spptr;
468: vechip->GPUarray = (PetscScalar *)array;
469: V->offloadmask = PETSC_OFFLOAD_GPU;
470: }
471: return 0;
472: }