Actual source code: vechip.hip.cpp

  1: /*
  2:  Implementation of the sequential hip vectors.

  4:  This file contains the code that can be compiled with a C
  5:  compiler.  The companion file vechip2.hip.cpp contains the code that
  6:  must be compiled with hipcc compiler.
  7:  */

  9: #define PETSC_SKIP_SPINLOCK

 11: #include <petscconf.h>
 12: #include <petsc/private/vecimpl.h>
 13: #include <../src/vec/vec/impls/dvecimpl.h>
 14: #include <petsc/private/hipvecimpl.h>

 16: PetscErrorCode VecHIPGetArrays_Private(Vec v, const PetscScalar **x, const PetscScalar **x_d, PetscOffloadMask *flg)
 17: {
 19:   if (x) {
 20:     Vec_Seq *h = (Vec_Seq *)v->data;

 22:     *x = h->array;
 23:   }
 24:   if (x_d) {
 25:     Vec_HIP *d = (Vec_HIP *)v->spptr;

 27:     *x_d = d ? d->GPUarray : NULL;
 28:   }
 29:   if (flg) *flg = v->offloadmask;
 30:   return 0;
 31: }

 33: /*
 34:     Allocates space for the vector array on the Host if it does not exist.
 35:     Does NOT change the PetscHIPFlag for the vector
 36:     Does NOT zero the HIP array
 37:  */
 38: PetscErrorCode VecHIPAllocateCheckHost(Vec v)
 39: {
 40:   PetscScalar *array;
 41:   Vec_Seq     *s = (Vec_Seq *)v->data;
 42:   PetscInt     n = v->map->n;

 44:   if (!s) {
 45:     PetscNew(&s);
 46:     v->data = s;
 47:   }
 48:   if (!s->array) {
 49:     if (n * sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
 50:       PetscMallocSetHIPHost();
 51:       v->pinned_memory = PETSC_TRUE;
 52:     }
 53:     PetscMalloc1(n, &array);
 54:     s->array           = array;
 55:     s->array_allocated = array;
 56:     if (n * sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) PetscMallocResetHIPHost();
 57:     if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) v->offloadmask = PETSC_OFFLOAD_CPU;
 58:   }
 59:   return 0;
 60: }

 62: PetscErrorCode VecCopy_SeqHIP_Private(Vec xin, Vec yin)
 63: {
 64:   PetscScalar       *ya;
 65:   const PetscScalar *xa;

 67:   VecHIPAllocateCheckHost(xin);
 68:   VecHIPAllocateCheckHost(yin);
 69:   if (xin != yin) {
 70:     VecGetArrayRead(xin, &xa);
 71:     VecGetArray(yin, &ya);
 72:     PetscArraycpy(ya, xa, xin->map->n);
 73:     VecRestoreArrayRead(xin, &xa);
 74:     VecRestoreArray(yin, &ya);
 75:   }
 76:   return 0;
 77: }

 79: PetscErrorCode VecSetRandom_SeqHIP(Vec xin, PetscRandom r)
 80: {
 81:   PetscInt     n = xin->map->n;
 82:   PetscScalar *xx;

 84:   VecGetArrayWrite(xin, &xx);
 85:   PetscRandomGetValues(r, n, xx);
 86:   VecRestoreArrayWrite(xin, &xx);
 87:   return 0;
 88: }

 90: PetscErrorCode VecDestroy_SeqHIP_Private(Vec v)
 91: {
 92:   Vec_Seq *vs = (Vec_Seq *)v->data;

 94:   PetscObjectSAWsViewOff(v);
 95: #if defined(PETSC_USE_LOG)
 96:   PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n);
 97: #endif
 98:   if (vs) {
 99:     if (vs->array_allocated) {
100:       if (v->pinned_memory) PetscMallocSetHIPHost();
101:       PetscFree(vs->array_allocated);
102:       if (v->pinned_memory) {
103:         PetscMallocResetHIPHost();
104:         v->pinned_memory = PETSC_FALSE;
105:       }
106:     }
107:     PetscFree(vs);
108:   }
109:   return 0;
110: }

112: PetscErrorCode VecResetArray_SeqHIP_Private(Vec vin)
113: {
114:   Vec_Seq *v = (Vec_Seq *)vin->data;

116:   v->array         = v->unplacedarray;
117:   v->unplacedarray = 0;
118:   return 0;
119: }

121: PetscErrorCode VecResetArray_SeqHIP(Vec vin)
122: {
123:   VecHIPCopyFromGPU(vin);
124:   VecResetArray_SeqHIP_Private(vin);
125:   vin->offloadmask = PETSC_OFFLOAD_CPU;
126:   return 0;
127: }

129: PetscErrorCode VecPlaceArray_SeqHIP(Vec vin, const PetscScalar *a)
130: {
131:   VecHIPCopyFromGPU(vin);
132:   VecPlaceArray_Seq(vin, a);
133:   vin->offloadmask = PETSC_OFFLOAD_CPU;
134:   return 0;
135: }

137: PetscErrorCode VecReplaceArray_SeqHIP(Vec vin, const PetscScalar *a)
138: {
139:   Vec_Seq *vs = (Vec_Seq *)vin->data;

141:   if (vs->array != vs->array_allocated) {
142:     /* make sure the users array has the latest values */
143:     VecHIPCopyFromGPU(vin);
144:   }
145:   if (vs->array_allocated) {
146:     if (vin->pinned_memory) PetscMallocSetHIPHost();
147:     PetscFree(vs->array_allocated);
148:     if (vin->pinned_memory) PetscMallocResetHIPHost();
149:   }
150:   vin->pinned_memory  = PETSC_FALSE;
151:   vs->array_allocated = vs->array = (PetscScalar *)a;
152:   vin->offloadmask                = PETSC_OFFLOAD_CPU;
153:   return 0;
154: }

156: /*@
157:  VecCreateSeqHIP - Creates a standard, sequential array-style vector.

159:  Collective

161:  Input Parameter:
162:  +  comm - the communicator, should be PETSC_COMM_SELF
163:  -  n - the vector length

165:  Output Parameter:
166:  .  v - the vector

168:  Notes:
169:  Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
170:  same type as an existing vector.

172:  Level: intermediate

174:  .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
175:  @*/
176: PetscErrorCode VecCreateSeqHIP(MPI_Comm comm, PetscInt n, Vec *v)
177: {
178:   VecCreate(comm, v);
179:   VecSetSizes(*v, n, n);
180:   VecSetType(*v, VECSEQHIP);
181:   return 0;
182: }

184: PetscErrorCode VecDuplicate_SeqHIP(Vec win, Vec *V)
185: {
186:   VecCreateSeqHIP(PetscObjectComm((PetscObject)win), win->map->n, V);
187:   PetscLayoutReference(win->map, &(*V)->map);
188:   PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)(*V))->olist);
189:   PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)(*V))->qlist);
190:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
191:   return 0;
192: }

194: PetscErrorCode VecCreate_SeqHIP(Vec V)
195: {
196:   PetscDeviceInitialize(PETSC_DEVICE_HIP);
197:   PetscLayoutSetUp(V->map);
198:   VecHIPAllocateCheck(V);
199:   VecCreate_SeqHIP_Private(V, ((Vec_HIP *)V->spptr)->GPUarray_allocated);
200:   VecSet_SeqHIP(V, 0.0);
201:   return 0;
202: }

204: /*@C
205:    VecCreateSeqHIPWithArray - Creates a HIP sequential array-style vector,
206:    where the user provides the array space to store the vector values. The array
207:    provided must be a GPU array.

209:    Collective

211:    Input Parameters:
212: +  comm - the communicator, should be PETSC_COMM_SELF
213: .  bs - the block size
214: .  n - the vector length
215: -  array - GPU memory where the vector elements are to be stored.

217:    Output Parameter:
218: .  V - the vector

220:    Notes:
221:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
222:    same type as an existing vector.

224:    If the user-provided array is NULL, then VecHIPPlaceArray() can be used
225:    at a later stage to SET the array for storing the vector values.

227:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
228:    The user should not free the array until the vector is destroyed.

230:    Level: intermediate

232: .seealso: `VecCreateMPIHIPWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
233:           `VecCreateGhost()`, `VecCreateSeq()`, `VecHIPPlaceArray()`, `VecCreateSeqWithArray()`,
234:           `VecCreateMPIWithArray()`
235: @*/
236: PetscErrorCode VecCreateSeqHIPWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar array[], Vec *V)
237: {
238:   PetscDeviceInitialize(PETSC_DEVICE_HIP);
239:   VecCreate(comm, V);
240:   VecSetSizes(*V, n, n);
241:   VecSetBlockSize(*V, bs);
242:   VecCreate_SeqHIP_Private(*V, array);
243:   return 0;
244: }

246: /*@C
247:    VecCreateSeqHIPWithArrays - Creates a HIP sequential array-style vector,
248:    where the user provides the array space to store the vector values.

250:    Collective

252:    Input Parameters:
253: +  comm - the communicator, should be PETSC_COMM_SELF
254: .  bs - the block size
255: .  n - the vector length
256: -  cpuarray - CPU memory where the vector elements are to be stored.
257: -  gpuarray - GPU memory where the vector elements are to be stored.

259:    Output Parameter:
260: .  V - the vector

262:    Notes:
263:    If both cpuarray and gpuarray are provided, the caller must ensure that
264:    the provided arrays have identical values.

266:    PETSc does NOT free the provided arrays when the vector is destroyed via
267:    VecDestroy(). The user should not free the array until the vector is
268:    destroyed.

270:    Level: intermediate

272: .seealso: `VecCreateMPIHIPWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
273:           `VecHIPPlaceArray()`, `VecCreateSeqHIPWithArray()`,
274:           `VecHIPAllocateCheckHost()`
275: @*/
276: PetscErrorCode VecCreateSeqHIPWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *V)
277: {
278:   // set V's gpuarray to be gpuarray, do not allocate memory on host yet.
279:   VecCreateSeqHIPWithArray(comm, bs, n, gpuarray, V);

281:   if (cpuarray && gpuarray) {
282:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
283:     s->array          = (PetscScalar *)cpuarray;
284:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
285:   } else if (cpuarray) {
286:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
287:     s->array          = (PetscScalar *)cpuarray;
288:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
289:   } else if (gpuarray) {
290:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
291:   } else {
292:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
293:   }

295:   return 0;
296: }

298: PetscErrorCode VecGetArray_SeqHIP(Vec v, PetscScalar **a)
299: {
300:   VecHIPCopyFromGPU(v);
301:   *a = *((PetscScalar **)v->data);
302:   return 0;
303: }

305: PetscErrorCode VecRestoreArray_SeqHIP(Vec v, PetscScalar **a)
306: {
307:   v->offloadmask = PETSC_OFFLOAD_CPU;
308:   return 0;
309: }

311: PetscErrorCode VecGetArrayWrite_SeqHIP(Vec v, PetscScalar **a)
312: {
313:   VecHIPAllocateCheckHost(v);
314:   *a = *((PetscScalar **)v->data);
315:   return 0;
316: }

318: PetscErrorCode VecGetArrayAndMemType_SeqHIP(Vec v, PetscScalar **a, PetscMemType *mtype)
319: {
320:   VecHIPCopyToGPU(v);
321:   *a = ((Vec_HIP *)v->spptr)->GPUarray;
322:   if (mtype) *mtype = PETSC_MEMTYPE_HIP;
323:   return 0;
324: }

326: PetscErrorCode VecRestoreArrayAndMemType_SeqHIP(Vec v, PetscScalar **a)
327: {
328:   v->offloadmask = PETSC_OFFLOAD_GPU;
329:   return 0;
330: }

332: PetscErrorCode VecGetArrayWriteAndMemType_SeqHIP(Vec v, PetscScalar **a, PetscMemType *mtype)
333: {
334:   /* Allocate memory (not zeroed) on device if not yet, but no need to sync data from host to device */
335:   VecHIPAllocateCheck(v);
336:   *a = ((Vec_HIP *)v->spptr)->GPUarray;
337:   if (mtype) *mtype = PETSC_MEMTYPE_HIP;
338:   return 0;
339: }

341: PetscErrorCode VecBindToCPU_SeqHIP(Vec V, PetscBool bind)
342: {
343:   V->boundtocpu = bind;
344:   if (bind) {
345:     VecHIPCopyFromGPU(V);
346:     V->offloadmask                  = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
347:     V->ops->dot                     = VecDot_Seq;
348:     V->ops->norm                    = VecNorm_Seq;
349:     V->ops->tdot                    = VecTDot_Seq;
350:     V->ops->scale                   = VecScale_Seq;
351:     V->ops->copy                    = VecCopy_Seq;
352:     V->ops->set                     = VecSet_Seq;
353:     V->ops->swap                    = VecSwap_Seq;
354:     V->ops->axpy                    = VecAXPY_Seq;
355:     V->ops->axpby                   = VecAXPBY_Seq;
356:     V->ops->axpbypcz                = VecAXPBYPCZ_Seq;
357:     V->ops->pointwisemult           = VecPointwiseMult_Seq;
358:     V->ops->pointwisedivide         = VecPointwiseDivide_Seq;
359:     V->ops->setrandom               = VecSetRandom_Seq;
360:     V->ops->dot_local               = VecDot_Seq;
361:     V->ops->tdot_local              = VecTDot_Seq;
362:     V->ops->norm_local              = VecNorm_Seq;
363:     V->ops->mdot_local              = VecMDot_Seq;
364:     V->ops->mtdot_local             = VecMTDot_Seq;
365:     V->ops->maxpy                   = VecMAXPY_Seq;
366:     V->ops->mdot                    = VecMDot_Seq;
367:     V->ops->mtdot                   = VecMTDot_Seq;
368:     V->ops->aypx                    = VecAYPX_Seq;
369:     V->ops->waxpy                   = VecWAXPY_Seq;
370:     V->ops->dotnorm2                = NULL;
371:     V->ops->placearray              = VecPlaceArray_Seq;
372:     V->ops->replacearray            = VecReplaceArray_SeqHIP;
373:     V->ops->resetarray              = VecResetArray_Seq;
374:     V->ops->duplicate               = VecDuplicate_Seq;
375:     V->ops->conjugate               = VecConjugate_Seq;
376:     V->ops->getlocalvector          = NULL;
377:     V->ops->restorelocalvector      = NULL;
378:     V->ops->getlocalvectorread      = NULL;
379:     V->ops->restorelocalvectorread  = NULL;
380:     V->ops->getarraywrite           = NULL;
381:     V->ops->getarrayandmemtype      = NULL;
382:     V->ops->restorearrayandmemtype  = NULL;
383:     V->ops->getarraywriteandmemtype = NULL;
384:     V->ops->max                     = VecMax_Seq;
385:     V->ops->min                     = VecMin_Seq;
386:     V->ops->reciprocal              = VecReciprocal_Default;
387:     V->ops->sum                     = NULL;
388:     V->ops->shift                   = NULL;
389:   } else {
390:     V->ops->dot                     = VecDot_SeqHIP;
391:     V->ops->norm                    = VecNorm_SeqHIP;
392:     V->ops->tdot                    = VecTDot_SeqHIP;
393:     V->ops->scale                   = VecScale_SeqHIP;
394:     V->ops->copy                    = VecCopy_SeqHIP;
395:     V->ops->set                     = VecSet_SeqHIP;
396:     V->ops->swap                    = VecSwap_SeqHIP;
397:     V->ops->axpy                    = VecAXPY_SeqHIP;
398:     V->ops->axpby                   = VecAXPBY_SeqHIP;
399:     V->ops->axpbypcz                = VecAXPBYPCZ_SeqHIP;
400:     V->ops->pointwisemult           = VecPointwiseMult_SeqHIP;
401:     V->ops->pointwisedivide         = VecPointwiseDivide_SeqHIP;
402:     V->ops->setrandom               = VecSetRandom_SeqHIP;
403:     V->ops->dot_local               = VecDot_SeqHIP;
404:     V->ops->tdot_local              = VecTDot_SeqHIP;
405:     V->ops->norm_local              = VecNorm_SeqHIP;
406:     V->ops->mdot_local              = VecMDot_SeqHIP;
407:     V->ops->maxpy                   = VecMAXPY_SeqHIP;
408:     V->ops->mdot                    = VecMDot_SeqHIP;
409:     V->ops->aypx                    = VecAYPX_SeqHIP;
410:     V->ops->waxpy                   = VecWAXPY_SeqHIP;
411:     V->ops->dotnorm2                = VecDotNorm2_SeqHIP;
412:     V->ops->placearray              = VecPlaceArray_SeqHIP;
413:     V->ops->replacearray            = VecReplaceArray_SeqHIP;
414:     V->ops->resetarray              = VecResetArray_SeqHIP;
415:     V->ops->destroy                 = VecDestroy_SeqHIP;
416:     V->ops->duplicate               = VecDuplicate_SeqHIP;
417:     V->ops->conjugate               = VecConjugate_SeqHIP;
418:     V->ops->getlocalvector          = VecGetLocalVector_SeqHIP;
419:     V->ops->restorelocalvector      = VecRestoreLocalVector_SeqHIP;
420:     V->ops->getlocalvectorread      = VecGetLocalVectorRead_SeqHIP;
421:     V->ops->restorelocalvectorread  = VecRestoreLocalVectorRead_SeqHIP;
422:     V->ops->getarraywrite           = VecGetArrayWrite_SeqHIP;
423:     V->ops->getarray                = VecGetArray_SeqHIP;
424:     V->ops->restorearray            = VecRestoreArray_SeqHIP;
425:     V->ops->getarrayandmemtype      = VecGetArrayAndMemType_SeqHIP;
426:     V->ops->restorearrayandmemtype  = VecRestoreArrayAndMemType_SeqHIP;
427:     V->ops->getarraywriteandmemtype = VecGetArrayWriteAndMemType_SeqHIP;
428:     V->ops->max                     = VecMax_SeqHIP;
429:     V->ops->min                     = VecMin_SeqHIP;
430:     V->ops->reciprocal              = VecReciprocal_SeqHIP;
431:     V->ops->sum                     = VecSum_SeqHIP;
432:     V->ops->shift                   = VecShift_SeqHIP;
433:   }
434:   return 0;
435: }

437: PetscErrorCode VecCreate_SeqHIP_Private(Vec V, const PetscScalar *array)
438: {
439:   Vec_HIP    *vechip;
440:   PetscMPIInt size;
441:   PetscBool   option_set;

443:   MPI_Comm_size(PetscObjectComm((PetscObject)V), &size);
445:   VecCreate_Seq_Private(V, 0);
446:   PetscObjectChangeTypeName((PetscObject)V, VECSEQHIP);
447:   VecBindToCPU_SeqHIP(V, PETSC_FALSE);
448:   V->ops->bindtocpu = VecBindToCPU_SeqHIP;

450:   /* Later, functions check for the Vec_HIP structure existence, so do not create it without array */
451:   if (array) {
452:     if (!V->spptr) {
453:       PetscReal pinned_memory_min;

455:       PetscCalloc(sizeof(Vec_HIP), &V->spptr);
456:       vechip         = (Vec_HIP *)V->spptr;
457:       V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;

459:       pinned_memory_min = 0;
460:       /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
461:          Note: This same code duplicated in VecHIPAllocateCheck() and VecCreate_MPIHIP_Private(). Is there a good way to avoid this? */
462:       PetscOptionsBegin(PetscObjectComm((PetscObject)V), ((PetscObject)V)->prefix, "VECHIP Options", "Vec");
463:       PetscOptionsReal("-vec_pinned_memory_min", "Minimum size (in bytes) for an allocation to use pinned memory on host", "VecSetPinnedMemoryMin", pinned_memory_min, &pinned_memory_min, &option_set);
464:       if (option_set) V->minimum_bytes_pinned_memory = pinned_memory_min;
465:       PetscOptionsEnd();
466:     }
467:     vechip           = (Vec_HIP *)V->spptr;
468:     vechip->GPUarray = (PetscScalar *)array;
469:     V->offloadmask   = PETSC_OFFLOAD_GPU;
470:   }
471:   return 0;
472: }