Actual source code: deviceimpl.h
1: #ifndef PETSCDEVICEIMPL_H
2: #define PETSCDEVICEIMPL_H
4: #include <petscdevice.h>
5: #include <petsc/private/petscimpl.h>
7: /* logging support */
8: PETSC_INTERN PetscLogEvent CUBLAS_HANDLE_CREATE;
9: PETSC_INTERN PetscLogEvent CUSOLVER_HANDLE_CREATE;
10: PETSC_INTERN PetscLogEvent HIPSOLVER_HANDLE_CREATE;
11: PETSC_INTERN PetscLogEvent HIPBLAS_HANDLE_CREATE;
13: PETSC_INTERN PetscLogEvent DCONTEXT_Create;
14: PETSC_INTERN PetscLogEvent DCONTEXT_Destroy;
15: PETSC_INTERN PetscLogEvent DCONTEXT_ChangeStream;
16: PETSC_INTERN PetscLogEvent DCONTEXT_SetDevice;
17: PETSC_INTERN PetscLogEvent DCONTEXT_SetUp;
18: PETSC_INTERN PetscLogEvent DCONTEXT_Duplicate;
19: PETSC_INTERN PetscLogEvent DCONTEXT_QueryIdle;
20: PETSC_INTERN PetscLogEvent DCONTEXT_WaitForCtx;
21: PETSC_INTERN PetscLogEvent DCONTEXT_Fork;
22: PETSC_INTERN PetscLogEvent DCONTEXT_Join;
23: PETSC_INTERN PetscLogEvent DCONTEXT_Sync;
24: PETSC_INTERN PetscLogEvent DCONTEXT_Mark;
26: /* type cast macros for some additional type-safety in C++ land */
27: #if defined(__cplusplus)
28: #define PetscStreamTypeCast(...) static_cast<PetscStreamType>(__VA_ARGS__)
29: #define PetscDeviceTypeCast(...) static_cast<PetscDeviceType>(__VA_ARGS__)
30: #define PetscDeviceInitTypeCast(...) static_cast<PetscDeviceInitType>(__VA_ARGS__)
31: #else
32: #define PetscStreamTypeCast(...) ((PetscStreamType)(__VA_ARGS__))
33: #define PetscDeviceTypeCast(...) ((PetscDeviceType)(__VA_ARGS__))
34: #define PetscDeviceInitTypeCast(...) ((PetscDeviceInitType)(__VA_ARGS__))
35: #endif
37: #if defined(PETSC_CLANG_STATIC_ANALYZER)
38: template <typename T>
40: template <typename T, typename U>
42: template <typename T>
44: template <typename T>
46: template <typename T, typename U>
48: template <typename T>
50: template <typename T>
52: template <typename T, typename U>
54: #elif PetscDefined(HAVE_CXX) && (PetscDefined(USE_DEBUG) || PetscDefined(DEVICE_KEEP_ERROR_CHECKING_MACROS))
56: do { \
57: PetscDeviceType pvdt_dtype_ = PetscDeviceTypeCast(dtype); \
58: int pvdt_argno_ = (int)(argno); \
60: if (PetscUnlikely(!PetscDeviceConfiguredFor_Internal(pvdt_dtype_))) { \
62: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, \
63: "Not configured for PetscDeviceType '%s': Argument #%d;" \
64: " run configure --help %s for available options", \
65: PetscDeviceTypes[pvdt_dtype_], pvdt_argno_, PetscDeviceTypes[pvdt_dtype_]); \
66: } \
67: } while (0)
70: do { \
71: PetscDeviceType pccdt_dtype1_ = PetscDeviceTypeCast(dtype1); \
72: PetscDeviceType pccdt_dtype2_ = PetscDeviceTypeCast(dtype2); \
76: } while (0)
79: do { \
80: PetscDevice pvd_dev_ = dev; \
81: int pvd_argno_ = (int)(argno); \
86: } while (0)
89: do { \
90: PetscDeviceAttribute pvda_attr_ = (dattr); \
91: int pvda_argno_ = (int)(argno); \
94: } while (0)
96: /*
97: for now just checks strict equality, but this can be changed as some devices (i.e. kokkos and
98: any cupm should be compatible once implemented)
99: */
101: do { \
102: PetscDevice pccd_dev1_ = (dev1), pccd_dev2_ = (dev2); \
103: int pccd_argno1_ = (int)(argno1), pccd_argno2_ = (int)(argno2); \
107: } while (0)
110: do { \
111: PetscStreamType pvst_stype_ = PetscStreamTypeCast(stype); \
112: int pvst_argno_ = (int)(argno); \
115: } while (0)
118: do { \
119: PetscDeviceContext pvdc_dctx_ = dctx; \
120: int pvdc_argno_ = (int)(argno); \
123: if (pvdc_dctx_->device) { \
125: } else { \
127: "Invalid PetscDeviceContext: Argument #%d; " \
128: "PetscDeviceContext is setup but has no PetscDevice", \
129: pvdc_argno_); \
130: } \
133: pvdc_dctx_->numChildren, pvdc_dctx_->maxNumChildren); \
134: } while (0)
137: do { \
138: PetscDeviceContext pccdc_dctx1_ = (dctx1), pccdc_dctx2_ = (dctx2); \
139: int pccdc_argno1_ = (int)(argno1), pccdc_argno2_ = (int)(argno2); \
143: } while (0)
144: #else /* PetscDefined(USE_DEBUG) */
153: #endif /* PetscDefined(USE_DEBUG) */
155: /* if someone is ready to rock with more than 128 GPUs on hand then we're in real trouble */
156: #define PETSC_DEVICE_MAX_DEVICES 128
158: /*
159: the configure-time default device type, used as the initial the value of
160: PETSC_DEVICE_DEFAULT() as well as what it is restored to during PetscFinalize()
161: */
162: #if PetscDefined(HAVE_HIP)
163: #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_HIP
164: #elif PetscDefined(HAVE_CUDA)
165: #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_CUDA
166: #elif PetscDefined(HAVE_SYCL)
167: #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_SYCL
168: #else
169: #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_HOST
170: #endif
172: #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE_TYPE PETSC_DEVICE_HARDWARE_DEFAULT_TYPE
173: // REMOVE ME (change)
174: #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE PETSC_STREAM_GLOBAL_BLOCKING
176: typedef struct _DeviceOps *DeviceOps;
177: struct _DeviceOps {
178: /* the creation routine for the corresponding PetscDeviceContext, this is NOT intended
179: * to be called by the PetscDevice itself */
180: PetscErrorCode (*createcontext)(PetscDeviceContext);
181: PetscErrorCode (*configure)(PetscDevice);
182: PetscErrorCode (*view)(PetscDevice, PetscViewer);
183: PetscErrorCode (*getattribute)(PetscDevice, PetscDeviceAttribute, void *);
184: };
186: struct _n_PetscDevice {
187: struct _DeviceOps ops[1];
188: void *data; /* placeholder */
189: PetscInt refcnt; /* reference count for the device */
190: PetscInt id; /* unique id per created PetscDevice */
191: PetscInt deviceId; /* the id of the underlying device, i.e. the return of
192: * cudaGetDevice() for example */
193: PetscDeviceType type; /* type of device */
194: };
196: typedef struct _n_PetscEvent *PetscEvent;
197: struct _n_PetscEvent {
198: PetscDeviceType dtype; // this cannot change for the lifetime of the event
199: PetscObjectId dctx_id; // id of last dctx to record this event
200: PetscObjectState dctx_state; // state of last dctx to record this event
201: void *data; // event handle
202: PetscErrorCode (*destroy)(PetscEvent);
203: };
205: typedef struct _DeviceContextOps *DeviceContextOps;
206: struct _DeviceContextOps {
207: PetscErrorCode (*destroy)(PetscDeviceContext);
208: PetscErrorCode (*changestreamtype)(PetscDeviceContext, PetscStreamType);
209: PetscErrorCode (*setup)(PetscDeviceContext);
210: PetscErrorCode (*query)(PetscDeviceContext, PetscBool *);
211: PetscErrorCode (*waitforcontext)(PetscDeviceContext, PetscDeviceContext);
212: PetscErrorCode (*synchronize)(PetscDeviceContext);
213: PetscErrorCode (*getblashandle)(PetscDeviceContext, void *);
214: PetscErrorCode (*getsolverhandle)(PetscDeviceContext, void *);
215: PetscErrorCode (*getstreamhandle)(PetscDeviceContext, void *);
216: PetscErrorCode (*begintimer)(PetscDeviceContext);
217: PetscErrorCode (*endtimer)(PetscDeviceContext, PetscLogDouble *);
218: PetscErrorCode (*memalloc)(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **); // optional
219: PetscErrorCode (*memfree)(PetscDeviceContext, PetscMemType, void **); // optional
220: PetscErrorCode (*memcopy)(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t, PetscDeviceCopyMode); // optional
221: PetscErrorCode (*memset)(PetscDeviceContext, PetscMemType, void *, PetscInt, size_t); // optional
222: PetscErrorCode (*createevent)(PetscDeviceContext, PetscEvent); // optional
223: PetscErrorCode (*recordevent)(PetscDeviceContext, PetscEvent); // optional
224: PetscErrorCode (*waitforevent)(PetscDeviceContext, PetscEvent); // optional
225: };
227: struct _p_PetscDeviceContext {
228: PETSCHEADER(struct _DeviceContextOps);
229: PetscDevice device; /* the device this context stems from */
230: void *data; /* solver contexts, event, stream */
231: PetscObjectId *childIDs; /* array containing ids of contexts currently forked from this one */
232: PetscInt numChildren; /* how many children does this context expect to destroy */
233: PetscInt maxNumChildren; /* how many children can this context have room for without realloc'ing */
234: PetscStreamType streamType; /* how should this contexts stream behave around other streams? */
235: PetscBool setup;
236: PetscBool usersetdevice;
237: };
239: // ===================================================================================
240: // PetscDevice Internal Functions
241: // ===================================================================================
242: #if PetscDefined(HAVE_CXX)
243: PETSC_INTERN PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm);
244: PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType, PetscDevice *);
246: static inline PetscErrorCode PetscDeviceReference_Internal(PetscDevice device)
247: {
248: ++(device->refcnt);
249: return 0;
250: }
252: static inline PetscErrorCode PetscDeviceDereference_Internal(PetscDevice device)
253: {
254: --(device->refcnt);
255: PetscAssert(device->refcnt >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "PetscDevice has negative reference count %" PetscInt_FMT, device->refcnt);
256: return 0;
257: }
258: #else /* PETSC_HAVE_CXX for PetscDevice Internal Functions */
259: #define PetscDeviceInitializeFromOptions_Internal(comm) 0
260: #define PetscDeviceGetDefaultForType_Internal(Type, device) 0
261: #define PetscDeviceReference_Internal(device) 0
262: #define PetscDeviceDereference_Internal(device) 0
263: #endif /* PETSC_HAVE_CXX for PetscDevice Internal Functions */
265: static inline PetscErrorCode PetscDeviceCheckDeviceCount_Internal(PetscInt count)
266: {
267: PetscAssert(count < PETSC_DEVICE_MAX_DEVICES, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Detected %" PetscInt_FMT " devices, which is larger than maximum supported number of devices %d", count, PETSC_DEVICE_MAX_DEVICES);
268: return 0;
269: }
271: /* More general form of PetscDeviceDefaultType_Internal(), as it calls the former using
272: * the automatically selected default PetscDeviceType */
273: #define PetscDeviceGetDefault_Internal(device) PetscDeviceGetDefaultForType_Internal(PETSC_DEVICE_DEFAULT(), device)
275: static inline PETSC_CONSTEXPR_14 PetscBool PetscDeviceConfiguredFor_Internal(PetscDeviceType type)
276: {
277: switch (type) {
278: case PETSC_DEVICE_HOST:
279: return PETSC_TRUE;
280: /* casts are needed in C++ */
281: case PETSC_DEVICE_CUDA:
282: return (PetscBool)PetscDefined(HAVE_CUDA);
283: case PETSC_DEVICE_HIP:
284: return (PetscBool)PetscDefined(HAVE_HIP);
285: case PETSC_DEVICE_SYCL:
286: return (PetscBool)PetscDefined(HAVE_SYCL);
287: case PETSC_DEVICE_MAX:
288: return PETSC_FALSE;
289: /* Do not add default case! Will make compiler warn on new additions to PetscDeviceType! */
290: }
291: PetscUnreachable();
292: return PETSC_FALSE;
293: }
295: // ===================================================================================
296: // PetscDeviceContext Internal Functions
297: // ===================================================================================
298: #if PetscDefined(HAVE_CXX)
299: PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *);
301: static inline PetscErrorCode PetscDeviceContextGetHandle_Private(PetscDeviceContext dctx, void *handle, PetscErrorCode (*gethandle_op)(PetscDeviceContext, void *))
302: {
305: (*gethandle_op)(dctx, handle);
306: return 0;
307: }
309: static inline PetscErrorCode PetscDeviceContextGetBLASHandle_Internal(PetscDeviceContext dctx, void *handle)
310: {
311: /* we do error checking here as this routine is an entry-point */
313: PetscDeviceContextGetHandle_Private(dctx, handle, dctx->ops->getblashandle);
314: return 0;
315: }
317: static inline PetscErrorCode PetscDeviceContextGetSOLVERHandle_Internal(PetscDeviceContext dctx, void *handle)
318: {
319: /* we do error checking here as this routine is an entry-point */
321: PetscDeviceContextGetHandle_Private(dctx, handle, dctx->ops->getsolverhandle);
322: return 0;
323: }
325: static inline PetscErrorCode PetscDeviceContextGetStreamHandle_Internal(PetscDeviceContext dctx, void *handle)
326: {
327: /* we do error checking here as this routine is an entry-point */
329: PetscDeviceContextGetHandle_Private(dctx, handle, dctx->ops->getstreamhandle);
330: return 0;
331: }
333: static inline PetscErrorCode PetscDeviceContextBeginTimer_Internal(PetscDeviceContext dctx)
334: {
335: /* we do error checking here as this routine is an entry-point */
337: PetscUseTypeMethod(dctx, begintimer);
338: return 0;
339: }
341: static inline PetscErrorCode PetscDeviceContextEndTimer_Internal(PetscDeviceContext dctx, PetscLogDouble *elapsed)
342: {
343: /* we do error checking here as this routine is an entry-point */
346: PetscUseTypeMethod(dctx, endtimer, elapsed);
347: return 0;
348: }
349: #else /* PETSC_HAVE_CXX for PetscDeviceContext Internal Functions */
350: #define PetscDeviceContextGetNullContext_Internal(dctx) (*(dctx) = PETSC_NULLPTR, 0)
351: #define PetscDeviceContextGetBLASHandle_Internal(dctx, handle) (*(handle) = PETSC_NULLPTR, 0)
352: #define PetscDeviceContextGetSOLVERHandle_Internal(dctx, handle) (*(handle) = PETSC_NULLPTR, 0)
353: #define PetscDeviceContextGetStreamHandle_Internal(dctx, handle) (*(handle) = PETSC_NULLPTR, 0)
354: #define PetscDeviceContextBeginTimer_Internal(dctx) 0
355: #define PetscDeviceContextEndTimer_Internal(dctx, elapsed) 0
356: #endif /* PETSC_HAVE_CXX for PetscDeviceContext Internal Functions */
358: /* note, only does assertion checking in debug mode */
359: static inline PetscErrorCode PetscDeviceContextGetCurrentContextAssertType_Internal(PetscDeviceContext *dctx, PetscDeviceType type)
360: {
361: PetscDeviceContextGetCurrentContext(dctx);
362: if (PetscDefined(USE_DEBUG)) {
363: PetscDeviceType dtype;
366: PetscDeviceContextGetDeviceType(*dctx, &dtype);
368: }
369: return 0;
370: }
372: static inline PetscErrorCode PetscDeviceContextGetOptionalNullContext_Internal(PetscDeviceContext *dctx)
373: {
375: if (!*dctx) PetscDeviceContextGetNullContext_Internal(dctx);
377: return 0;
378: }
380: /* Experimental API -- it will eventually become public */
381: #if PetscDefined(HAVE_CXX)
382: PETSC_EXTERN PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT, PetscMemType, size_t);
383: PETSC_EXTERN PetscErrorCode PetscDeviceGetAttribute(PetscDevice, PetscDeviceAttribute, void *);
384: PETSC_EXTERN PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext, PetscObjectId, PetscMemoryAccessMode, const char name[]);
385: #if defined(__cplusplus)
386: namespace
387: {
389: PETSC_NODISCARD inline PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext dctx, PetscObject obj, PetscMemoryAccessMode mode, const char name[])
390: {
391: PetscDeviceContextMarkIntentFromID(dctx, obj->id, mode, name);
392: return 0;
393: }
395: } // anonymous namespace
396: #endif // __cplusplus
397: #else
398: #define PetscDeviceRegisterMemory(void_ptr, PetscMemType, size) 0
399: #define PetscDeviceGetAttribute(PetscDevice, PetscDeviceAttribute, void_star) ((*((int *)(void_star)) = 0), 0)
400: #define PetscDeviceContextMarkIntentFromID(PetscDeviceContext, PetscObjectId, PetscMemoryAccessMode, ptr) 0
401: #endif
403: PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_HOST(PetscDeviceContext);
404: #if PetscDefined(HAVE_CUDA)
405: PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_CUDA(PetscDeviceContext);
406: #endif
407: #if PetscDefined(HAVE_HIP)
408: PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_HIP(PetscDeviceContext);
409: #endif
410: #if PetscDefined(HAVE_SYCL)
411: PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext);
412: #endif
413: #endif /* PETSCDEVICEIMPL_H */