Actual source code: dcontext.cxx
1: #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2: #include <petsc/private/viewerimpl.h>
4: #include <petsc/private/cpp/object_pool.hpp>
5: #include <petsc/private/cpp/utility.hpp>
6: #include <petsc/private/cpp/array.hpp>
8: #include <vector>
9: #include <string> // std::to_string among other things
11: /* Define the allocator */
12: class PetscDeviceContextAllocator : public Petsc::AllocatorBase<PetscDeviceContext> {
13: public:
14: PETSC_CXX_COMPAT_DECL(PetscErrorCode create(PetscDeviceContext *dctx))
15: {
16: PetscHeaderCreate(*dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView);
17: PetscObjectCast(*dctx)->cpp = new CxxData();
18: reset(*dctx, false);
19: return 0;
20: }
22: PETSC_CXX_COMPAT_DECL(PetscErrorCode destroy(PetscDeviceContext dctx))
23: {
24: PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
25: PetscTryTypeMethod(dctx, destroy);
26: PetscDeviceDestroy(&dctx->device);
27: PetscFree(dctx->childIDs);
28: delete CxxDataCast(dctx);
29: PetscHeaderDestroy(&dctx);
30: return 0;
31: }
33: PETSC_CXX_COMPAT_DECL(PetscErrorCode reset(PetscDeviceContext dctx, bool zero = true))
34: {
35: if (zero) {
36: // reset the device if the user set it
37: if (auto &userset = dctx->usersetdevice) {
38: userset = PETSC_FALSE;
39: PetscTryTypeMethod(dctx, destroy);
40: PetscDeviceDestroy(&dctx->device);
41: PetscArrayzero(dctx->ops, 1);
42: dctx->data = nullptr;
43: }
44: PetscHeaderReset_Internal(PetscObjectCast(dctx));
45: dctx->numChildren = 0;
46: dctx->setup = PETSC_FALSE;
47: // don't deallocate the child array, rather just zero it out
48: PetscArrayzero(dctx->childIDs, dctx->maxNumChildren);
49: CxxDataCast(dctx)->clear();
50: }
51: dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
52: return 0;
53: }
54: };
56: static Petsc::ObjectPool<PetscDeviceContext, PetscDeviceContextAllocator> contextPool;
58: /*@C
59: PetscDeviceContextCreate - Creates a `PetscDeviceContext`
61: Not Collective
63: Output Parameter:
64: . dctx - The `PetscDeviceContext`
66: Note:
67: Unlike almost every other PETSc class it is advised that most users use
68: `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
69: different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
70: ensures compatible types.
72: DAG representation:
73: .vb
74: time ->
76: |= CALL =| - dctx ->
77: .ve
79: Level: beginner
81: .N ASYNC_API
83: .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
84: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
85: `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
86: @*/
87: PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
88: {
90: PetscDeviceInitializePackage();
91: PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr);
92: contextPool.allocate(dctx);
93: PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr);
94: return 0;
95: }
97: /*@C
98: PetscDeviceContextDestroy - Frees a `PetscDeviceContext`
100: Not Collective
102: Input Parameters:
103: . dctx - The `PetscDeviceContext`
105: Notes:
106: No implicit synchronization occurs due to this routine, all resources are released completely
107: asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
108: `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
109: calling this routine.
111: DAG representation:
112: .vb
113: time ->
115: -> dctx - |= CALL =|
116: .ve
118: Developer Notes:
119: `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
120: growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
121: this should perhaps be implemented.
123: Level: beginner
125: .N ASYNC_API
127: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
128: `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
129: @*/
130: PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
131: {
133: if (!*dctx) return 0;
134: PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr);
135: if (--(PetscObjectCast(*dctx)->refct) <= 0) {
136: PetscDeviceContextCheckNotOrphaned_Internal(*dctx);
137: // std::move of the expression of the trivially-copyable type 'PetscDeviceContext' (aka
138: // '_n_PetscDeviceContext *') has no effect; remove std::move() [performance-move-const-arg]
139: // can't remove std::move, since reclaim only takes r-value reference
140: contextPool.deallocate(std::move(*dctx)); // NOLINT (performance-move-const-arg)
141: }
142: PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr);
143: *dctx = nullptr;
144: return 0;
145: }
147: /*@C
148: PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
149: `PetscDeviceContext`
151: Not Collective
153: Input Parameters:
154: + dctx - The `PetscDeviceContext`
155: - type - The `PetscStreamType`
157: Notes:
158: See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
159: types and their interactions. If the `PetscDeviceContext` was previously set up and stream
160: type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.
162: Level: beginner
164: .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
165: `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
166: @*/
167: PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
168: {
169: // do not use getoptionalnullcontext here since we do not want the user to change the stream
170: // type
173: // only need to do complex swapping if the object has already been setup
174: if (dctx->setup && (dctx->streamType != type)) {
175: dctx->setup = PETSC_FALSE;
176: PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr);
177: PetscUseTypeMethod(dctx, changestreamtype, type);
178: PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr);
179: }
180: dctx->streamType = type;
181: return 0;
182: }
184: /*@C
185: PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
186: `PetscDeviceContext`
188: Not Collective
190: Input Parameter:
191: . dctx - The `PetscDeviceContext`
193: Output Parameter:
194: . type - The `PetscStreamType`
196: Notes:
197: See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
198: types and their interactions
200: Level: beginner
202: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
203: `PetscDeviceContextSetFromOptions()`
204: @*/
205: PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
206: {
207: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
209: *type = dctx->streamType;
210: return 0;
211: }
213: /*
214: Actual function to set the device.
216: 1. Repeatedly destroying and recreating internal data structures (like streams and events)
217: for recycled PetscDeviceContexts is not free. If done often, it does add up.
218: 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
219: default contexts. The default contexts *never* change type, and the children are extremely
220: unlikely to (chances are if you fork once, you will fork again very soon).
221: 3. The only time this calculus changes is if the user themselves sets the device type. In
222: this case we do not know what the user has changed, so must always wipe the slate clean.
224: Thus we need to keep track whether the user explicitly sets the device contexts device.
225: */
226: static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
227: {
228: // do not use getoptionalnullcontext here since we do not want the user to change its device
231: if (dctx->device && (dctx->device->id == device->id)) return 0;
232: PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr);
233: if (const auto destroy = dctx->ops->destroy) (*destroy)(dctx);
234: PetscDeviceDestroy(&dctx->device);
235: PetscMemzero(dctx->ops, sizeof(*dctx->ops));
236: (*device->ops->createcontext)(dctx);
237: PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr);
238: PetscDeviceReference_Internal(device);
239: dctx->device = device;
240: dctx->setup = PETSC_FALSE;
241: dctx->usersetdevice = user_set;
242: return 0;
243: }
245: PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
246: {
247: PetscDevice device;
249: PetscDeviceGetDefaultForType_Internal(type, &device);
250: PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE);
251: return 0;
252: }
254: /*@C
255: PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`
257: Not Collective
259: Input Parameters:
260: + dctx - The `PetscDeviceContext`
261: - device - The `PetscDevice`
263: Notes:
264: This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
265: also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
266: necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
267: always come equipped with the "default" device.
269: This routine is a no-op if `device` is already attached to `dctx`.
271: This routine may (but is very unlikely to) initialize the backend device and may incur
272: synchronization.
274: Level: intermediate
276: .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
277: `PetscDeviceContextGetDeviceType()`
278: @*/
279: PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
280: {
281: PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE);
282: return 0;
283: }
285: /*@C
286: PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`
288: Not Collective
290: Input Parameter:
291: . dctx - the `PetscDeviceContext`
293: Output Parameter:
294: . device - The `PetscDevice`
296: Notes:
297: This is a borrowed reference, the user should not destroy `device`.
299: Level: intermediate
301: .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
302: @*/
303: PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
304: {
305: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
307: PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
308: *device = dctx->device;
309: return 0;
310: }
312: /*@C
313: PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`
315: Not Collective
317: Input Parameter:
318: . dctx - The `PetscDeviceContext`
320: Output Parameter:
321: . type - The `PetscDeviceType`
323: Notes:
324: This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
325: `PetscDeviceGetType()`.
327: Level: beginner
329: .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
330: @*/
331: PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
332: {
333: PetscDevice device = nullptr;
335: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
337: PetscDeviceContextGetDevice(dctx, &device);
338: PetscDeviceGetType(device, type);
339: return 0;
340: }
342: /*@C
343: PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use
345: Not Collective
347: Input Parameter:
348: . dctx - The `PetscDeviceContext`
350: Developer Notes:
351: This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
352: structures such as streams, events, and (possibly) handles.
354: Level: beginner
356: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
357: `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
358: @*/
359: PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
360: {
361: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
362: if (dctx->setup) return 0;
363: if (!dctx->device) {
364: const auto default_dtype = PETSC_DEVICE_DEFAULT();
366: PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]);
367: PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype);
368: }
369: PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr);
370: PetscUseTypeMethod(dctx, setup);
371: PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr);
372: dctx->setup = PETSC_TRUE;
373: return 0;
374: }
376: static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
377: {
378: PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr);
379: PetscDeviceContextCreate(dctxdup);
380: PetscDeviceContextSetStreamType(*dctxdup, stype);
381: if (const auto device = dctx->device) PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice);
382: PetscDeviceContextSetUp(*dctxdup);
383: PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr);
384: return 0;
385: }
387: /*@C
388: PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object
390: Not Collective
392: Input Parameter:
393: . dctx - The `PetscDeviceContext` to duplicate
395: Output Parameter:
396: . dctxdup - The duplicated `PetscDeviceContext`
398: Notes:
399: This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
400: another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
401: (including its current stream-state) they are completely separate objects.
403: There is no implied ordering between `dctx` or `dctxdup`.
405: DAG representation:
406: .vb
407: time ->
409: -> dctx - |= CALL =| - dctx ---->
410: - dctxdup ->
411: .ve
413: Level: beginner
415: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
416: `PetscDeviceContextSetStreamType()`
417: @*/
418: PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
419: {
420: auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
422: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
424: PetscDeviceContextGetStreamType(dctx, &stype);
425: PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup);
426: return 0;
427: }
429: /*@C
430: PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle
432: Not Collective
434: Input Parameter:
435: . dctx - The `PetscDeviceContext`
437: Output Parameter:
438: . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work
440: Note:
441: This routine only refers a singular context and does NOT take any of its children into
442: account. That is, if `dctx` is idle but has dependents who do have work this routine still
443: returns `PETSC_TRUE`.
445: Level: intermediate
447: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
448: @*/
449: PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
450: {
451: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
453: PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr);
454: PetscUseTypeMethod(dctx, query, idle);
455: PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr);
456: PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not");
457: return 0;
458: }
460: /*@C
461: PetscDeviceContextWaitForContext - Make one context wait for another context to finish
463: Not Collective
465: Input Parameters:
466: + dctxa - The `PetscDeviceContext` object that is waiting
467: - dctxb - The `PetscDeviceContext` object that is being waited on
469: Notes:
470: Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
471: does not wait for the serialization to actually occur.
473: This routine uses only the state of `dctxb` at the moment this routine was called, so any
474: future work queued will not affect `dctxa`. It is safe to pass the same context to both
475: arguments (in which case this routine does nothing).
477: DAG representation:
478: .vb
479: time ->
481: -> dctxa ---/- |= CALL =| - dctxa ->
482: /
483: -> dctxb -/------------------------>
484: .ve
486: Level: beginner
488: .N ASYNC_API
490: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
491: @*/
492: PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
493: {
494: PetscObject aobj;
496: PetscDeviceContextGetOptionalNullContext_Internal(&dctxa);
497: PetscDeviceContextGetOptionalNullContext_Internal(&dctxb);
499: if (dctxa == dctxb) return 0;
500: aobj = PetscObjectCast(dctxa);
501: PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr);
502: PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
503: CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb);
504: PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr);
505: PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id);
506: PetscObjectStateIncrease(aobj);
507: return 0;
508: }
510: /*@C
511: PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
512: context with a prescribed `PetscStreamType`
514: Not Collective, Asynchronous
516: Input Parameters:
517: + dctx - The parent `PetscDeviceContext`
518: . stype - The prescribed `PetscStreamType`
519: - n - The number of children to create
521: Output Parameter:
522: . dsub - The created child context(s)
524: Notes:
525: This routine creates `n` edges of a DAG from a source node which are causally dependent on the
526: source node. This causal dependency is established as-if by calling
527: `PetscDeviceContextWaitForContext()` on every child.
529: `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
530: expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.
532: This routine only accounts for work queued on `dctx` up until calling this routine, any
533: subsequent work enqueued on `dctx` has no effect on `dsub`.
535: The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
536: that they are different. This is useful in cases where a routine can locally exploit stream
537: parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
538: carries.
540: DAG representation:
541: .vb
542: time ->
544: -> dctx - |= CALL =| -\----> dctx ------>
545: \---> dsub[0] --->
546: \--> ... ------->
547: \-> dsub[n-1] ->
548: .ve
550: Level: intermediate
552: .N ASYNC_API
554: .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
555: `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
556: @*/
557: PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
558: {
559: // debugging only
560: std::string idList;
561: auto ninput = n;
563: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
564: PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
566: *dsub = nullptr;
567: /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
568: if (PetscDefined(USE_DEBUG_AND_INFO)) idList.reserve(4 * n);
569: PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr);
570: /* update child totals */
571: dctx->numChildren += n;
572: /* now to find out if we have room */
573: if (dctx->numChildren > dctx->maxNumChildren) {
574: const auto numChildren = dctx->numChildren;
575: auto &maxNumChildren = dctx->maxNumChildren;
576: auto numAllocated = numChildren;
578: /* no room, either from having too many kids or not having any */
579: if (auto &childIDs = dctx->childIDs) {
580: // the difference is backwards because we have not updated maxNumChildren yet
581: numAllocated -= maxNumChildren;
582: /* have existing children, must reallocate them */
583: PetscRealloc(numChildren * sizeof(*childIDs), &childIDs);
584: /* clear the extra memory since realloc doesn't do it for us */
585: PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated);
586: } else {
587: /* have no children */
588: PetscCalloc1(numChildren, &childIDs);
589: }
590: /* update total number of children */
591: maxNumChildren = numChildren;
592: }
593: PetscMalloc1(n, dsub);
594: for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
595: auto &childID = dctx->childIDs[i];
596: /* empty child slot */
597: if (!childID) {
598: auto &childctx = (*dsub)[i];
600: /* create the child context in the image of its parent */
601: PetscDeviceContextDuplicate_Private(dctx, stype, &childctx);
602: PetscDeviceContextWaitForContext(childctx, dctx);
603: /* register the child with its parent */
604: PetscObjectGetId(PetscObjectCast(childctx), &childID);
605: if (PetscDefined(USE_DEBUG_AND_INFO)) {
606: idList += std::to_string(childID);
607: if (ninput != 1) idList += ", ";
608: }
609: --ninput;
610: }
611: }
612: PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr);
613: PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str());
614: return 0;
615: }
617: /*@C
618: PetscDeviceContextFork - Create a set of dependent child contexts from a parent context
620: Not Collective, Asynchronous
622: Input Parameters:
623: + dctx - The parent `PetscDeviceContext`
624: - n - The number of children to create
626: Output Parameter:
627: . dsub - The created child context(s)
629: Notes:
630: Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
631: `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:
633: .vb
634: PetscStreamType stype;
636: PetscDeviceContextGetStreamType(dctx, &stype);
637: PetscDeviceContextForkWithStreamType(dctx, stype, ...);
638: .ve
640: Level: beginner
642: .N ASYNC_API
644: .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
645: `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
646: @*/
647: PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
648: {
649: auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
651: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
652: PetscDeviceContextGetStreamType(dctx, &stype);
653: PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub);
654: return 0;
655: }
657: /*@C
658: PetscDeviceContextJoin - Converge a set of child contexts
660: Not Collective, Asynchronous
662: Input Parameters:
663: + dctx - A `PetscDeviceContext` to converge on
664: . n - The number of sub contexts to converge
665: . joinMode - The type of join to perform
666: - dsub - The sub contexts to converge
668: Notes:
669: If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
670: node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
671: which receives `n` edges (and optionally destroys them) which is dependent on the completion
672: of all incoming edges.
674: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
675: destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
676: passed to this routine.
678: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
679: `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
680: sync suffix does NOT refer to the host, i.e. this routine does NOT call
681: `PetscDeviceSynchronize()`.
683: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
684: the sub contexts do not wait for one another or `dctx` afterwards.
686: DAG representations:
687: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
688: .vb
689: time ->
691: -> dctx ---------/- |= CALL =| - dctx ->
692: -> dsub[0] -----/
693: -> ... -------/
694: -> dsub[n-1] -/
695: .ve
696: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
697: .vb
698: time ->
700: -> dctx ---------/- |= CALL =| -\----> dctx ------>
701: -> dsub[0] -----/ \---> dsub[0] --->
702: -> ... -------/ \--> ... ------->
703: -> dsub[n-1] -/ \-> dsub[n-1] ->
704: .ve
705: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
706: .vb
707: time ->
709: -> dctx ----------/- |= CALL =| - dctx ->
710: -> dsub[0] ------/----------------------->
711: -> ... --------/------------------------>
712: -> dsub[n-1] --/------------------------->
713: .ve
715: Level: beginner
717: .N ASYNC_API
719: .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
720: `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
721: @*/
722: PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
723: {
724: // debugging only
725: std::string idList;
727: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
728: /* validity of dctx is checked in the wait-for loop */
730: PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
731: /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
732: if (PetscDefined(USE_DEBUG_AND_INFO)) idList.reserve(4 * n);
733: /* first dctx waits on all the incoming edges */
734: PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr);
735: for (PetscInt i = 0; i < n; ++i) {
737: PetscDeviceContextWaitForContext(dctx, (*dsub)[i]);
738: if (PetscDefined(USE_DEBUG_AND_INFO)) {
739: idList += std::to_string(PetscObjectCast((*dsub)[i])->id);
740: if (i + 1 < n) idList += ", ";
741: }
742: }
744: /* now we handle the aftermath */
745: switch (joinMode) {
746: case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
747: const auto children = dctx->childIDs;
748: const auto maxchild = dctx->maxNumChildren;
749: auto &nchild = dctx->numChildren;
750: PetscInt j = 0;
753: /* update child count while it's still fresh in memory */
754: nchild -= n;
755: for (PetscInt i = 0; i < maxchild; ++i) {
756: if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
757: /* child is one of ours, can destroy it */
758: PetscDeviceContextDestroy((*dsub) + j);
759: /* reset the child slot */
760: children[i] = 0;
761: if (++j == n) break;
762: }
763: }
764: /* gone through the loop but did not find every child */
766: PetscFree(*dsub);
767: } break;
768: case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
769: for (PetscInt i = 0; i < n; ++i) PetscDeviceContextWaitForContext((*dsub)[i], dctx);
770: case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
771: break;
772: default:
773: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
774: }
775: PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr);
777: PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str());
778: return 0;
779: }
781: /*@C
782: PetscDeviceContextSynchronize - Block the host until all work queued on a
783: `PetscDeviceContext` has finished
785: Not Collective
787: Input Parameters:
788: . dctx - The `PetscDeviceContext` to synchronize
790: Notes:
791: The host will not return from this routine until `dctx` is idle. Any and all memory
792: operations queued on or otherwise associated with (either explicitly or implicitly via
793: dependencies) are guaranteed to have finished and be globally visible on return.
795: In effect, this routine serves as memory and execution barrier.
797: DAG representation:
798: .vb
799: time ->
801: -> dctx - |= CALL =| - dctx ->
802: .ve
804: Level: beginner
806: .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
807: @*/
808: PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
809: {
810: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
811: PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr);
812: /* if it isn't setup there is nothing to sync on */
813: if (dctx->setup) {
814: (*dctx->ops->synchronize)(dctx);
815: PetscDeviceContextSyncClearMap_Internal(dctx);
816: }
817: PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr);
818: return 0;
819: }
821: /* every device type has a vector of null PetscDeviceContexts -- one for each device */
822: static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
823: static auto nullContextsFinalizer = false;
825: static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
826: {
827: PetscInt devid;
828: PetscDeviceType dtype;
832: if (PetscUnlikely(!nullContextsFinalizer)) {
833: const auto finalizer = [] {
834: for (auto &&dvec : nullContexts) {
835: for (auto &&dctx : dvec) PetscDeviceContextDestroy(&dctx);
836: dvec.clear();
837: }
838: nullContextsFinalizer = false;
839: return 0;
840: };
842: nullContextsFinalizer = true;
843: PetscRegisterFinalize(std::move(finalizer));
844: }
845: PetscDeviceGetDeviceId(device, &devid);
846: PetscDeviceGetType(device, &dtype);
847: {
848: auto &ctxlist = nullContexts[dtype];
851: // need to resize the container if not big enough because incrementing the iterator in
852: // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
853: // current size of the container.
854: if (static_cast<std::size_t>(devid) >= ctxlist.size()) ctxlist.resize(devid + 1);
855: if (PetscUnlikely(!ctxlist[devid])) {
856: // we have not seen this device before
857: PetscDeviceContextCreate(dctx);
858: PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid);
859: {
860: const auto pobj = PetscObjectCast(*dctx);
861: const auto name = "null context " + std::to_string(devid);
862: const auto prefix = "null_context_" + std::to_string(devid) + '_';
864: PetscObjectSetName(pobj, name.c_str());
865: PetscObjectSetOptionsPrefix(pobj, prefix.c_str());
866: }
867: PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING);
868: PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device);
869: PetscDeviceContextSetUp(*dctx);
870: // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
871: ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx);
872: } else *dctx = ctxlist[devid];
873: }
874: return 0;
875: }
877: /*
878: Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
879: guaranteed to always be globally blocking.
880: */
881: PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
882: {
883: PetscDeviceContext gctx;
884: PetscDevice gdev = nullptr;
887: PetscDeviceContextGetCurrentContext(&gctx);
888: PetscDeviceContextGetDevice(gctx, &gdev);
889: PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx);
890: return 0;
891: }
893: /*@C
894: PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database
896: Collective on `comm` or `dctx`
898: Input Parameters:
899: + comm - MPI communicator on which to query the options database (optional)
900: - dctx - The `PetscDeviceContext` to configure
902: Output Parameter:
903: . dctx - The `PetscDeviceContext`
905: Options Database Keys:
906: + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
907: `PetscDeviceContextSetStreamType()`
908: - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`
910: Notes:
911: The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
912: used (which is always `PETSC_COMM_SELF`).
914: Level: beginner
916: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
917: `PetscDeviceContextView()`
918: @*/
919: PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
920: {
921: const auto pobj = PetscObjectCast(dctx);
922: auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
923: auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
924: MPI_Comm old_comm = PETSC_COMM_SELF;
926: // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
928: /* set the device type first */
929: if (const auto device = dctx->device) PetscDeviceGetType(device, &dtype.first);
930: PetscDeviceContextGetStreamType(dctx, &stype.first);
932: if (comm == MPI_COMM_NULL) {
933: PetscObjectGetComm(pobj, &comm);
934: } else {
935: // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
936: // PetscObjectOptionsBegin() behaves as if dctx had comm
937: old_comm = Petsc::util::exchange(pobj->comm, comm);
938: }
940: PetscObjectOptionsBegin(pobj);
941: PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype);
942: PetscOptionsEnd();
943: // reset the comm (should be PETSC_COMM_SELF)
944: if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
945: if (dtype.second) PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first);
946: if (stype.second) PetscDeviceContextSetStreamType(dctx, stype.first);
947: PetscDeviceContextSetUp(dctx);
948: return 0;
949: }
951: /*@C
952: PetscDeviceContextView - View a `PetscDeviceContext`
954: Collective on `viewer`
956: Input Parameters:
957: + dctx - The `PetscDeviceContext`
958: - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)
960: Notes:
961: If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
962: routine is collective on `PETSC_COMM_WORLD`.
964: Level: beginner
966: .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
967: @*/
968: PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
969: {
970: PetscBool iascii;
972: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
973: if (!viewer) PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer);
975: PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii);
976: if (iascii) {
977: auto stype = PETSC_STREAM_DEFAULT_BLOCKING;
978: PetscViewer sub;
980: PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub);
981: PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub);
982: PetscViewerASCIIPushTab(sub);
983: PetscDeviceContextGetStreamType(dctx, &stype);
984: PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]);
985: PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren);
986: if (const auto nchild = dctx->numChildren) {
987: PetscViewerASCIIPushTab(sub);
988: for (PetscInt i = 0; i < nchild; ++i) {
989: if (i == nchild - 1) {
990: PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]);
991: } else {
992: PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]);
993: }
994: }
995: }
996: PetscViewerASCIIPopTab(sub);
997: PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub);
998: PetscViewerFlush(viewer);
999: PetscViewerASCIIPushTab(viewer);
1000: }
1001: if (const auto device = dctx->device) PetscDeviceView(device, viewer);
1002: if (iascii) PetscViewerASCIIPopTab(viewer);
1003: return 0;
1004: }
1006: /*@C
1007: PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options
1009: Input Parameters:
1010: + dctx - The `PetscDeviceContext` to view
1011: . obj - Optional `PetscObject` to associate (may be `NULL`)
1012: - name - The command line option
1014: Level: beginner
1016: .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1017: @*/
1018: PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1019: {
1020: PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
1023: PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name);
1024: return 0;
1025: }