Actual source code: cupmevent.hpp

  1: #ifndef PETSC_CUPMEVENT_HPP
  2: #define PETSC_CUPMEVENT_HPP

  4: #include <petsc/private/cupminterface.hpp>
  5: #include <petsc/private/cpp/memory.hpp>
  6: #include <petsc/private/cpp/object_pool.hpp>

  8: #if defined(__cplusplus)
  9: namespace Petsc
 10: {

 12: namespace device
 13: {

 15: namespace cupm
 16: {

 18: namespace
 19: {

 21: // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and
 22: // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20%
 23: // speedup.
 24: template <DeviceType T, unsigned long flags>
 25: struct CUPMEventPoolAllocator : impl::Interface<T>, AllocatorBase<typename impl::Interface<T>::cupmEvent_t> {
 26:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(interface_type, T);

 28:   PETSC_NODISCARD static PetscErrorCode create(cupmEvent_t *) noexcept;
 29:   PETSC_NODISCARD static PetscErrorCode destroy(cupmEvent_t) noexcept;
 30: };

 32: template <DeviceType T, unsigned long flags>
 33: inline PetscErrorCode CUPMEventPoolAllocator<T, flags>::create(cupmEvent_t *event) noexcept
 34: {
 35:   cupmEventCreateWithFlags(event, flags);
 36:   return 0;
 37: }

 39: template <DeviceType T, unsigned long flags>
 40: inline PetscErrorCode CUPMEventPoolAllocator<T, flags>::destroy(cupmEvent_t event) noexcept
 41: {
 42:   cupmEventDestroy(event);
 43:   return 0;
 44: }

 46: } // anonymous namespace

 48: template <DeviceType T, unsigned long flags, typename allocator_type = CUPMEventPoolAllocator<T, flags>, typename pool_type = ObjectPool<typename allocator_type::value_type, allocator_type>>
 49: pool_type &cupm_event_pool() noexcept
 50: {
 51:   static pool_type pool;
 52:   return pool;
 53: }

 55: // pool of events with timing disabled
 56: template <DeviceType T>
 57: inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) &
 58: {
 59:   return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>();
 60: }

 62: // pool of events with timing enabled
 63: template <DeviceType T>
 64: inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) &
 65: {
 66:   return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>();
 67: }

 69: // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the
 70: // event-stream pairing for the async allocator. It is also used as the data member of
 71: // PetscEvent.
 72: template <DeviceType T>
 73: class CUPMEvent : impl::Interface<T>, public memory::PoolAllocated<CUPMEvent<T>> {
 74:   using pool_type = memory::PoolAllocated<CUPMEvent<T>>;

 76: public:
 77:   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(interface_type, T);

 79:   constexpr CUPMEvent() noexcept = default;
 80:   ~CUPMEvent() noexcept;

 82:   CUPMEvent(CUPMEvent &&) noexcept;
 83:   CUPMEvent &operator=(CUPMEvent &&) noexcept;

 85:   // event is not copyable
 86:   CUPMEvent(const CUPMEvent &)            = delete;
 87:   CUPMEvent &operator=(const CUPMEvent &) = delete;

 89:   PETSC_NODISCARD cupmEvent_t    get() noexcept;
 90:   PETSC_NODISCARD PetscErrorCode record(cupmStream_t) noexcept;

 92:   explicit operator bool() const noexcept;

 94: private:
 95:   cupmEvent_t event_{};
 96: };

 98: template <DeviceType T>
 99: inline CUPMEvent<T>::~CUPMEvent() noexcept
100: {
101:   if (event_) PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(std::move(event_));
102:   return;
103: }

105: template <DeviceType T>
106: inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : interface_type(std::move(other)), pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{}))
107: {
108: }

110: template <DeviceType T>
111: inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept
112: {
113:   if (this != &other) {
114:     interface_type::operator=(std::move(other));
115:     pool_type::     operator=(std::move(other));
116:     if (event_) PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(std::move(event_));
117:     event_ = util::exchange(other.event_, cupmEvent_t{});
118:   }
119:   PetscFunctionReturn(*this);
120: }

122: template <DeviceType T>
123: inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept
124: {
125:   if (PetscUnlikely(!event_)) PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_);
126:   return event_;
127: }

129: template <DeviceType T>
130: inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept
131: {
132:   cupmEventRecord(get(), stream);
133:   return 0;
134: }

136: template <DeviceType T>
137: inline CUPMEvent<T>::operator bool() const noexcept
138: {
139:   return event_ != cupmEvent_t{};
140: }

142: } // namespace cupm

144: } // namespace device

146: } // namespace Petsc
147: #endif // __cplusplus

149: #endif // PETSC_CUPMEVENT_HPP