Actual source code: dcontext.cxx

  1: #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
  2: #include <petsc/private/viewerimpl.h>

  4: #include <petsc/private/cpp/object_pool.hpp>
  5: #include <petsc/private/cpp/utility.hpp>
  6: #include <petsc/private/cpp/array.hpp>

  8: #include <vector>
  9: #include <string> // std::to_string among other things

 11: /* Define the allocator */
 12: class PetscDeviceContextAllocator : public Petsc::AllocatorBase<PetscDeviceContext> {
 13: public:
 14:   PETSC_CXX_COMPAT_DECL(PetscErrorCode create(PetscDeviceContext *dctx))
 15:   {
 16:     PetscHeaderCreate(*dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView);
 17:     PetscObjectCast(*dctx)->cpp = new CxxData();
 18:     reset(*dctx, false);
 19:     return 0;
 20:   }

 22:   PETSC_CXX_COMPAT_DECL(PetscErrorCode destroy(PetscDeviceContext dctx))
 23:   {
 24:     PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren);
 25:     PetscTryTypeMethod(dctx, destroy);
 26:     PetscDeviceDestroy(&dctx->device);
 27:     PetscFree(dctx->childIDs);
 28:     delete CxxDataCast(dctx);
 29:     PetscHeaderDestroy(&dctx);
 30:     return 0;
 31:   }

 33:   PETSC_CXX_COMPAT_DECL(PetscErrorCode reset(PetscDeviceContext dctx, bool zero = true))
 34:   {
 35:     if (zero) {
 36:       // reset the device if the user set it
 37:       if (auto &userset = dctx->usersetdevice) {
 38:         userset = PETSC_FALSE;
 39:         PetscTryTypeMethod(dctx, destroy);
 40:         PetscDeviceDestroy(&dctx->device);
 41:         PetscArrayzero(dctx->ops, 1);
 42:         dctx->data = nullptr;
 43:       }
 44:       PetscHeaderReset_Internal(PetscObjectCast(dctx));
 45:       dctx->numChildren = 0;
 46:       dctx->setup       = PETSC_FALSE;
 47:       // don't deallocate the child array, rather just zero it out
 48:       PetscArrayzero(dctx->childIDs, dctx->maxNumChildren);
 49:       CxxDataCast(dctx)->clear();
 50:     }
 51:     dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING;
 52:     return 0;
 53:   }
 54: };

 56: static Petsc::ObjectPool<PetscDeviceContext, PetscDeviceContextAllocator> contextPool;

 58: /*@C
 59:   PetscDeviceContextCreate - Creates a `PetscDeviceContext`

 61:   Not Collective

 63:   Output Parameter:
 64: . dctx - The `PetscDeviceContext`

 66:   Note:
 67:   Unlike almost every other PETSc class it is advised that most users use
 68:   `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of
 69:   different types are incompatible with one another; using `PetscDeviceContextDuplicate()`
 70:   ensures compatible types.

 72:   DAG representation:
 73: .vb
 74:   time ->

 76:   |= CALL =| - dctx ->
 77: .ve

 79:   Level: beginner

 81: .N ASYNC_API

 83: .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`,
 84: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`,
 85: `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()`
 86: @*/
 87: PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx)
 88: {
 90:   PetscDeviceInitializePackage();
 91:   PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr);
 92:   contextPool.allocate(dctx);
 93:   PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr);
 94:   return 0;
 95: }

 97: /*@C
 98:   PetscDeviceContextDestroy - Frees a `PetscDeviceContext`

100:   Not Collective

102:   Input Parameters:
103: . dctx - The `PetscDeviceContext`

105:   Notes:
106:   No implicit synchronization occurs due to this routine, all resources are released completely
107:   asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on
108:   `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before
109:   calling this routine.

111:   DAG representation:
112: .vb
113:   time ->

115:   -> dctx - |= CALL =|
116: .ve

118:   Developer Notes:
119:   `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever
120:   growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool,
121:   this should perhaps be implemented.

123:   Level: beginner

125: .N ASYNC_API

127: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
128: `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()`
129: @*/
130: PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx)
131: {
133:   if (!*dctx) return 0;
134:   PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr);
135:   if (--(PetscObjectCast(*dctx)->refct) <= 0) {
136:     PetscDeviceContextCheckNotOrphaned_Internal(*dctx);
137:     // std::move of the expression of the trivially-copyable type 'PetscDeviceContext' (aka
138:     // '_n_PetscDeviceContext *') has no effect; remove std::move() [performance-move-const-arg]
139:     // can't remove std::move, since reclaim only takes r-value reference
140:     contextPool.deallocate(std::move(*dctx)); // NOLINT (performance-move-const-arg)
141:   }
142:   PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr);
143:   *dctx = nullptr;
144:   return 0;
145: }

147: /*@C
148:   PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a
149:   `PetscDeviceContext`

151:   Not Collective

153:   Input Parameters:
154: + dctx - The `PetscDeviceContext`
155: - type - The `PetscStreamType`

157:   Notes:
158:   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
159:   types and their interactions. If the `PetscDeviceContext` was previously set up and stream
160:   type was changed, you must call `PetscDeviceContextSetUp()` again after this routine.

162:   Level: beginner

164: .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`,
165: `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`
166: @*/
167: PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type)
168: {
169:   // do not use getoptionalnullcontext here since we do not want the user to change the stream
170:   // type
173:   // only need to do complex swapping if the object has already been setup
174:   if (dctx->setup && (dctx->streamType != type)) {
175:     dctx->setup = PETSC_FALSE;
176:     PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr);
177:     PetscUseTypeMethod(dctx, changestreamtype, type);
178:     PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr);
179:   }
180:   dctx->streamType = type;
181:   return 0;
182: }

184: /*@C
185:   PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a
186:   `PetscDeviceContext`

188:   Not Collective

190:   Input Parameter:
191: . dctx - The `PetscDeviceContext`

193:   Output Parameter:
194: . type - The `PetscStreamType`

196:   Notes:
197:   See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available
198:   types and their interactions

200:   Level: beginner

202: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`,
203: `PetscDeviceContextSetFromOptions()`
204: @*/
205: PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type)
206: {
207:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
209:   *type = dctx->streamType;
210:   return 0;
211: }

213: /*
214:   Actual function to set the device.

216:   1. Repeatedly destroying and recreating internal data structures (like streams and events)
217:      for recycled PetscDeviceContexts is not free. If done often, it does add up.
218:   2. The vast majority of PetscDeviceContexts are created by PETSc either as children or
219:      default contexts. The default contexts *never* change type, and the children are extremely
220:      unlikely to (chances are if you fork once, you will fork again very soon).
221:   3. The only time this calculus changes is if the user themselves sets the device type. In
222:      this case we do not know what the user has changed, so must always wipe the slate clean.

224:   Thus we need to keep track whether the user explicitly sets the device contexts device.
225: */
226: static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set)
227: {
228:   // do not use getoptionalnullcontext here since we do not want the user to change its device
231:   if (dctx->device && (dctx->device->id == device->id)) return 0;
232:   PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr);
233:   if (const auto destroy = dctx->ops->destroy) (*destroy)(dctx);
234:   PetscDeviceDestroy(&dctx->device);
235:   PetscMemzero(dctx->ops, sizeof(*dctx->ops));
236:   (*device->ops->createcontext)(dctx);
237:   PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr);
238:   PetscDeviceReference_Internal(device);
239:   dctx->device        = device;
240:   dctx->setup         = PETSC_FALSE;
241:   dctx->usersetdevice = user_set;
242:   return 0;
243: }

245: PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type)
246: {
247:   PetscDevice device;

249:   PetscDeviceGetDefaultForType_Internal(type, &device);
250:   PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE);
251:   return 0;
252: }

254: /*@C
255:   PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext`

257:   Not Collective

259:   Input Parameters:
260: + dctx   - The `PetscDeviceContext`
261: - device - The `PetscDevice`

263:   Notes:
264:   This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must
265:   also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly
266:   necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will
267:   always come equipped with the "default" device.

269:   This routine is a no-op if `device` is already attached to `dctx`.

271:   This routine may (but is very unlikely to) initialize the backend device and may incur
272:   synchronization.

274:   Level: intermediate

276: .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`,
277: `PetscDeviceContextGetDeviceType()`
278: @*/
279: PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device)
280: {
281:   PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE);
282:   return 0;
283: }

285: /*@C
286:   PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext`

288:   Not Collective

290:   Input Parameter:
291: . dctx - the `PetscDeviceContext`

293:   Output Parameter:
294: . device - The `PetscDevice`

296:   Notes:
297:   This is a borrowed reference, the user should not destroy `device`.

299:   Level: intermediate

301: .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()`
302: @*/
303: PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device)
304: {
305:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
307:   PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id);
308:   *device = dctx->device;
309:   return 0;
310: }

312: /*@C
313:   PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext`

315:   Not Collective

317:   Input Parameter:
318: . dctx - The `PetscDeviceContext`

320:   Output Parameter:
321: . type - The `PetscDeviceType`

323:   Notes:
324:   This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` ->
325:   `PetscDeviceGetType()`.

327:   Level: beginner

329: .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice`
330: @*/
331: PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type)
332: {
333:   PetscDevice device = nullptr;

335:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
337:   PetscDeviceContextGetDevice(dctx, &device);
338:   PetscDeviceGetType(device, type);
339:   return 0;
340: }

342: /*@C
343:   PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use

345:   Not Collective

347:   Input Parameter:
348: . dctx - The `PetscDeviceContext`

350:   Developer Notes:
351:   This routine is usually the stage where a `PetscDeviceContext` acquires device-side data
352:   structures such as streams, events, and (possibly) handles.

354:   Level: beginner

356: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
357: `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()`
358: @*/
359: PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx)
360: {
361:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
362:   if (dctx->setup) return 0;
363:   if (!dctx->device) {
364:     const auto default_dtype = PETSC_DEVICE_DEFAULT();

366:     PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype]);
367:     PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype);
368:   }
369:   PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr);
370:   PetscUseTypeMethod(dctx, setup);
371:   PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr);
372:   dctx->setup = PETSC_TRUE;
373:   return 0;
374: }

376: static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup)
377: {
378:   PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr);
379:   PetscDeviceContextCreate(dctxdup);
380:   PetscDeviceContextSetStreamType(*dctxdup, stype);
381:   if (const auto device = dctx->device) PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice);
382:   PetscDeviceContextSetUp(*dctxdup);
383:   PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr);
384:   return 0;
385: }

387: /*@C
388:   PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object

390:   Not Collective

392:   Input Parameter:
393: . dctx - The `PetscDeviceContext` to duplicate

395:   Output Parameter:
396: . dctxdup - The duplicated `PetscDeviceContext`

398:   Notes:
399:   This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as
400:   another. Note however that `dctxdup` does not share any of the underlying data with `dctx`,
401:   (including its current stream-state) they are completely separate objects.

403:   There is no implied ordering between `dctx` or `dctxdup`.

405:   DAG representation:
406: .vb
407:   time ->

409:   -> dctx - |= CALL =| - dctx ---->
410:                        - dctxdup ->
411: .ve

413:   Level: beginner

415: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`,
416: `PetscDeviceContextSetStreamType()`
417: @*/
418: PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup)
419: {
420:   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;

422:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
424:   PetscDeviceContextGetStreamType(dctx, &stype);
425:   PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup);
426:   return 0;
427: }

429: /*@C
430:   PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle

432:   Not Collective

434:   Input Parameter:
435: . dctx - The `PetscDeviceContext`

437:   Output Parameter:
438: . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work

440:   Note:
441:   This routine only refers a singular context and does NOT take any of its children into
442:   account. That is, if `dctx` is idle but has dependents who do have work this routine still
443:   returns `PETSC_TRUE`.

445:   Level: intermediate

447: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()`
448: @*/
449: PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle)
450: {
451:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
453:   PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr);
454:   PetscUseTypeMethod(dctx, query, idle);
455:   PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr);
456:   PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not");
457:   return 0;
458: }

460: /*@C
461:   PetscDeviceContextWaitForContext - Make one context wait for another context to finish

463:   Not Collective

465:   Input Parameters:
466: + dctxa - The `PetscDeviceContext` object that is waiting
467: - dctxb - The `PetscDeviceContext` object that is being waited on

469:   Notes:
470:   Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host
471:   does not wait for the serialization to actually occur.

473:   This routine uses only the state of `dctxb` at the moment this routine was called, so any
474:   future work queued will not affect `dctxa`. It is safe to pass the same context to both
475:   arguments (in which case this routine does nothing).

477:   DAG representation:
478: .vb
479:   time ->

481:   -> dctxa ---/- |= CALL =| - dctxa ->
482:              /
483:   -> dctxb -/------------------------>
484: .ve

486:   Level: beginner

488: .N ASYNC_API

490: .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()`
491: @*/
492: PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb)
493: {
494:   PetscObject aobj;

496:   PetscDeviceContextGetOptionalNullContext_Internal(&dctxa);
497:   PetscDeviceContextGetOptionalNullContext_Internal(&dctxb);
499:   if (dctxa == dctxb) return 0;
500:   aobj = PetscObjectCast(dctxa);
501:   PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr);
502:   PetscUseTypeMethod(dctxa, waitforcontext, dctxb);
503:   CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb);
504:   PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr);
505:   PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id);
506:   PetscObjectStateIncrease(aobj);
507:   return 0;
508: }

510: /*@C
511:   PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent
512:   context with a prescribed `PetscStreamType`

514:   Not Collective, Asynchronous

516:   Input Parameters:
517: + dctx  - The parent `PetscDeviceContext`
518: . stype - The prescribed `PetscStreamType`
519: - n     - The number of children to create

521:   Output Parameter:
522: . dsub - The created child context(s)

524:   Notes:
525:   This routine creates `n` edges of a DAG from a source node which are causally dependent on the
526:   source node. This causal dependency is established as-if by calling
527:   `PetscDeviceContextWaitForContext()` on every child.

529:   `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx`
530:   expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed.

532:   This routine only accounts for work queued on `dctx` up until calling this routine, any
533:   subsequent work enqueued on `dctx` has no effect on `dsub`.

535:   The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case
536:   that they are different. This is useful in cases where a routine can locally exploit stream
537:   parallelism without needing to worry about what stream type the incoming `PetscDeviceContext`
538:   carries.

540:   DAG representation:
541: .vb
542:   time ->

544:   -> dctx - |= CALL =| -\----> dctx ------>
545:                          \---> dsub[0] --->
546:                           \--> ... ------->
547:                            \-> dsub[n-1] ->
548: .ve

550:   Level: intermediate

552: .N ASYNC_API

554: .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`,
555: `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()`
556: @*/
557: PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub)
558: {
559:   // debugging only
560:   std::string idList;
561:   auto        ninput = n;

563:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
564:   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n);
566:   *dsub = nullptr;
567:   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
568:   if (PetscDefined(USE_DEBUG_AND_INFO)) idList.reserve(4 * n);
569:   PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr);
570:   /* update child totals */
571:   dctx->numChildren += n;
572:   /* now to find out if we have room */
573:   if (dctx->numChildren > dctx->maxNumChildren) {
574:     const auto numChildren    = dctx->numChildren;
575:     auto      &maxNumChildren = dctx->maxNumChildren;
576:     auto       numAllocated   = numChildren;

578:     /* no room, either from having too many kids or not having any */
579:     if (auto &childIDs = dctx->childIDs) {
580:       // the difference is backwards because we have not updated maxNumChildren yet
581:       numAllocated -= maxNumChildren;
582:       /* have existing children, must reallocate them */
583:       PetscRealloc(numChildren * sizeof(*childIDs), &childIDs);
584:       /* clear the extra memory since realloc doesn't do it for us */
585:       PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated);
586:     } else {
587:       /* have no children */
588:       PetscCalloc1(numChildren, &childIDs);
589:     }
590:     /* update total number of children */
591:     maxNumChildren = numChildren;
592:   }
593:   PetscMalloc1(n, dsub);
594:   for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) {
595:     auto &childID = dctx->childIDs[i];
596:     /* empty child slot */
597:     if (!childID) {
598:       auto &childctx = (*dsub)[i];

600:       /* create the child context in the image of its parent */
601:       PetscDeviceContextDuplicate_Private(dctx, stype, &childctx);
602:       PetscDeviceContextWaitForContext(childctx, dctx);
603:       /* register the child with its parent */
604:       PetscObjectGetId(PetscObjectCast(childctx), &childID);
605:       if (PetscDefined(USE_DEBUG_AND_INFO)) {
606:         idList += std::to_string(childID);
607:         if (ninput != 1) idList += ", ";
608:       }
609:       --ninput;
610:     }
611:   }
612:   PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr);
613:   PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str());
614:   return 0;
615: }

617: /*@C
618:   PetscDeviceContextFork - Create a set of dependent child contexts from a parent context

620:   Not Collective, Asynchronous

622:   Input Parameters:
623: + dctx - The parent `PetscDeviceContext`
624: - n    - The number of children to create

626:   Output Parameter:
627: . dsub - The created child context(s)

629:   Notes:
630:   Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed
631:   `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\:

633: .vb
634:   PetscStreamType stype;

636:   PetscDeviceContextGetStreamType(dctx, &stype);
637:   PetscDeviceContextForkWithStreamType(dctx, stype, ...);
638: .ve

640:   Level: beginner

642: .N ASYNC_API

644: .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`,
645: `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`
646: @*/
647: PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub)
648: {
649:   auto stype = PETSC_STREAM_DEFAULT_BLOCKING;

651:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
652:   PetscDeviceContextGetStreamType(dctx, &stype);
653:   PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub);
654:   return 0;
655: }

657: /*@C
658:   PetscDeviceContextJoin - Converge a set of child contexts

660:   Not Collective, Asynchronous

662:   Input Parameters:
663: + dctx         - A `PetscDeviceContext` to converge on
664: . n            - The number of sub contexts to converge
665: . joinMode     - The type of join to perform
666: - dsub         - The sub contexts to converge

668:   Notes:
669:   If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source
670:   node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`)
671:   which receives `n` edges (and optionally destroys them) which is dependent on the completion
672:   of all incoming edges.

674:   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be
675:   destroyed by this routine. Thus all sub contexts must have been created with the `dctx`
676:   passed to this routine.

678:   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on
679:   `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the
680:   sync suffix does NOT refer to the host, i.e. this routine does NOT call
681:   `PetscDeviceSynchronize()`.

683:   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but
684:   the sub contexts do not wait for one another or `dctx` afterwards.

686:   DAG representations:
687:   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`
688: .vb
689:   time ->

691:   -> dctx ---------/- |= CALL =| - dctx ->
692:   -> dsub[0] -----/
693:   ->  ... -------/
694:   -> dsub[n-1] -/
695: .ve
696:   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`
697: .vb
698:   time ->

700:   -> dctx ---------/- |= CALL =| -\----> dctx ------>
701:   -> dsub[0] -----/                \---> dsub[0] --->
702:   ->  ... -------/                  \--> ... ------->
703:   -> dsub[n-1] -/                    \-> dsub[n-1] ->
704: .ve
705:   If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`
706: .vb
707:   time ->

709:   -> dctx ----------/- |= CALL =| - dctx ->
710:   -> dsub[0] ------/----------------------->
711:   ->  ... --------/------------------------>
712:   -> dsub[n-1] --/------------------------->
713: .ve

715:   Level: beginner

717: .N ASYNC_API

719: .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`,
720: `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode`
721: @*/
722: PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub)
723: {
724:   // debugging only
725:   std::string idList;

727:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
728:   /* validity of dctx is checked in the wait-for loop */
730:   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n);
731:   /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */
732:   if (PetscDefined(USE_DEBUG_AND_INFO)) idList.reserve(4 * n);
733:   /* first dctx waits on all the incoming edges */
734:   PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr);
735:   for (PetscInt i = 0; i < n; ++i) {
737:     PetscDeviceContextWaitForContext(dctx, (*dsub)[i]);
738:     if (PetscDefined(USE_DEBUG_AND_INFO)) {
739:       idList += std::to_string(PetscObjectCast((*dsub)[i])->id);
740:       if (i + 1 < n) idList += ", ";
741:     }
742:   }

744:   /* now we handle the aftermath */
745:   switch (joinMode) {
746:   case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: {
747:     const auto children = dctx->childIDs;
748:     const auto maxchild = dctx->maxNumChildren;
749:     auto      &nchild   = dctx->numChildren;
750:     PetscInt   j        = 0;

753:     /* update child count while it's still fresh in memory */
754:     nchild -= n;
755:     for (PetscInt i = 0; i < maxchild; ++i) {
756:       if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) {
757:         /* child is one of ours, can destroy it */
758:         PetscDeviceContextDestroy((*dsub) + j);
759:         /* reset the child slot */
760:         children[i] = 0;
761:         if (++j == n) break;
762:       }
763:     }
764:     /* gone through the loop but did not find every child */
766:     PetscFree(*dsub);
767:   } break;
768:   case PETSC_DEVICE_CONTEXT_JOIN_SYNC:
769:     for (PetscInt i = 0; i < n; ++i) PetscDeviceContextWaitForContext((*dsub)[i], dctx);
770:   case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC:
771:     break;
772:   default:
773:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given");
774:   }
775:   PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr);

777:   PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str());
778:   return 0;
779: }

781: /*@C
782:   PetscDeviceContextSynchronize - Block the host until all work queued on a
783:   `PetscDeviceContext` has finished

785:   Not Collective

787:   Input Parameters:
788: . dctx - The `PetscDeviceContext` to synchronize

790:   Notes:
791:   The host will not return from this routine until `dctx` is idle. Any and all memory
792:   operations queued on or otherwise associated with (either explicitly or implicitly via
793:   dependencies) are guaranteed to have finished and be globally visible on return.

795:   In effect, this routine serves as memory and execution barrier.

797:   DAG representation:
798: .vb
799:   time ->

801:   -> dctx - |= CALL =| - dctx ->
802: .ve

804:   Level: beginner

806: .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()`
807: @*/
808: PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx)
809: {
810:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
811:   PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr);
812:   /* if it isn't setup there is nothing to sync on */
813:   if (dctx->setup) {
814:     (*dctx->ops->synchronize)(dctx);
815:     PetscDeviceContextSyncClearMap_Internal(dctx);
816:   }
817:   PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr);
818:   return 0;
819: }

821: /* every device type has a vector of null PetscDeviceContexts -- one for each device */
822: static auto nullContexts          = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{};
823: static auto nullContextsFinalizer = false;

825: static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx)
826: {
827:   PetscInt        devid;
828:   PetscDeviceType dtype;

832:   if (PetscUnlikely(!nullContextsFinalizer)) {
833:     const auto finalizer = [] {
834:       for (auto &&dvec : nullContexts) {
835:         for (auto &&dctx : dvec) PetscDeviceContextDestroy(&dctx);
836:         dvec.clear();
837:       }
838:       nullContextsFinalizer = false;
839:       return 0;
840:     };

842:     nullContextsFinalizer = true;
843:     PetscRegisterFinalize(std::move(finalizer));
844:   }
845:   PetscDeviceGetDeviceId(device, &devid);
846:   PetscDeviceGetType(device, &dtype);
847:   {
848:     auto &ctxlist = nullContexts[dtype];

851:     // need to resize the container if not big enough because incrementing the iterator in
852:     // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the
853:     // current size of the container.
854:     if (static_cast<std::size_t>(devid) >= ctxlist.size()) ctxlist.resize(devid + 1);
855:     if (PetscUnlikely(!ctxlist[devid])) {
856:       // we have not seen this device before
857:       PetscDeviceContextCreate(dctx);
858:       PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid);
859:       {
860:         const auto pobj   = PetscObjectCast(*dctx);
861:         const auto name   = "null context " + std::to_string(devid);
862:         const auto prefix = "null_context_" + std::to_string(devid) + '_';

864:         PetscObjectSetName(pobj, name.c_str());
865:         PetscObjectSetOptionsPrefix(pobj, prefix.c_str());
866:       }
867:       PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING);
868:       PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device);
869:       PetscDeviceContextSetUp(*dctx);
870:       // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert!
871:       ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx);
872:     } else *dctx = ctxlist[devid];
873:   }
874:   return 0;
875: }

877: /*
878:   Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are
879:   guaranteed to always be globally blocking.
880: */
881: PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx)
882: {
883:   PetscDeviceContext gctx;
884:   PetscDevice        gdev = nullptr;

887:   PetscDeviceContextGetCurrentContext(&gctx);
888:   PetscDeviceContextGetDevice(gctx, &gdev);
889:   PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx);
890:   return 0;
891: }

893: /*@C
894:   PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database

896:   Collective on `comm` or `dctx`

898:   Input Parameters:
899: + comm - MPI communicator on which to query the options database (optional)
900: - dctx - The `PetscDeviceContext` to configure

902:   Output Parameter:
903: . dctx - The `PetscDeviceContext`

905:   Options Database Keys:
906: + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` -
907:    `PetscDeviceContextSetStreamType()`
908: - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType`

910:   Notes:
911:   The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is
912:   used (which is always `PETSC_COMM_SELF`).

914:   Level: beginner

916: .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`,
917: `PetscDeviceContextView()`
918: @*/
919: PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx)
920: {
921:   const auto pobj     = PetscObjectCast(dctx);
922:   auto       dtype    = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
923:   auto       stype    = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
924:   MPI_Comm   old_comm = PETSC_COMM_SELF;

926:   // do not user getoptionalnullcontext here, the user is not allowed to set it from options!
928:   /* set the device type first */
929:   if (const auto device = dctx->device) PetscDeviceGetType(device, &dtype.first);
930:   PetscDeviceContextGetStreamType(dctx, &stype.first);

932:   if (comm == MPI_COMM_NULL) {
933:     PetscObjectGetComm(pobj, &comm);
934:   } else {
935:     // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so
936:     // PetscObjectOptionsBegin() behaves as if dctx had comm
937:     old_comm = Petsc::util::exchange(pobj->comm, comm);
938:   }

940:   PetscObjectOptionsBegin(pobj);
941:   PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype);
942:   PetscOptionsEnd();
943:   // reset the comm (should be PETSC_COMM_SELF)
944:   if (comm != MPI_COMM_NULL) pobj->comm = old_comm;
945:   if (dtype.second) PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first);
946:   if (stype.second) PetscDeviceContextSetStreamType(dctx, stype.first);
947:   PetscDeviceContextSetUp(dctx);
948:   return 0;
949: }

951: /*@C
952:   PetscDeviceContextView - View a `PetscDeviceContext`

954:   Collective on `viewer`

956:   Input Parameters:
957: + dctx - The `PetscDeviceContext`
958: - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`)

960:   Notes:
961:   If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this
962:   routine is collective on `PETSC_COMM_WORLD`.

964:   Level: beginner

966: .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()`
967: @*/
968: PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer)
969: {
970:   PetscBool iascii;

972:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
973:   if (!viewer) PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer);
975:   PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii);
976:   if (iascii) {
977:     auto        stype = PETSC_STREAM_DEFAULT_BLOCKING;
978:     PetscViewer sub;

980:     PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub);
981:     PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub);
982:     PetscViewerASCIIPushTab(sub);
983:     PetscDeviceContextGetStreamType(dctx, &stype);
984:     PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype]);
985:     PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren);
986:     if (const auto nchild = dctx->numChildren) {
987:       PetscViewerASCIIPushTab(sub);
988:       for (PetscInt i = 0; i < nchild; ++i) {
989:         if (i == nchild - 1) {
990:           PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i]);
991:         } else {
992:           PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i]);
993:         }
994:       }
995:     }
996:     PetscViewerASCIIPopTab(sub);
997:     PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub);
998:     PetscViewerFlush(viewer);
999:     PetscViewerASCIIPushTab(viewer);
1000:   }
1001:   if (const auto device = dctx->device) PetscDeviceView(device, viewer);
1002:   if (iascii) PetscViewerASCIIPopTab(viewer);
1003:   return 0;
1004: }

1006: /*@C
1007:   PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options

1009:   Input Parameters:
1010: + dctx - The `PetscDeviceContext` to view
1011: . obj  - Optional `PetscObject` to associate (may be `NULL`)
1012: - name - The command line option

1014:   Level: beginner

1016: .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()`
1017: @*/
1018: PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[])
1019: {
1020:   PetscDeviceContextGetOptionalNullContext_Internal(&dctx);
1023:   PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name);
1024:   return 0;
1025: }