Actual source code: cupmallocator.hpp
1: #ifndef CUPMALLOCATOR_HPP
2: #define CUPMALLOCATOR_HPP
4: #if defined(__cplusplus)
5: #include <petsc/private/cpp/object_pool.hpp>
7: #include "../segmentedmempool.hpp"
8: #include "cupmthrustutility.hpp"
10: #include <limits> // std::numeric_limits
12: namespace Petsc
13: {
15: namespace device
16: {
18: namespace cupm
19: {
21: // ==========================================================================================
22: // CUPM Host Allocator
23: // ==========================================================================================
25: template <DeviceType T, typename PetscType = char>
26: class HostAllocator;
28: // Allocator class to allocate pinned host memory for use with device
29: template <DeviceType T, typename PetscType>
30: class HostAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
31: public:
32: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(interface_type, T);
33: using base_type = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
34: using real_value_type = typename base_type::real_value_type;
35: using size_type = typename base_type::size_type;
36: using value_type = typename base_type::value_type;
38: template <typename U>
39: PETSC_NODISCARD static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
40: template <typename U>
41: PETSC_NODISCARD static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
42: template <typename U>
43: PETSC_NODISCARD static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
44: };
46: template <DeviceType T, typename P>
47: template <typename U>
48: inline PetscErrorCode HostAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *) noexcept
49: {
50: PetscCUPMMallocHost(ptr, n);
51: return 0;
52: }
54: template <DeviceType T, typename P>
55: template <typename U>
56: inline PetscErrorCode HostAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *) noexcept
57: {
58: cupmFreeHost(ptr);
59: return 0;
60: }
62: template <DeviceType T, typename P>
63: template <typename U>
64: inline PetscErrorCode HostAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
65: {
66: PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyHostToHost, stream->get_stream(), true);
67: return 0;
68: }
70: // ==========================================================================================
71: // CUPM Device Allocator
72: // ==========================================================================================
74: template <DeviceType T, typename PetscType = char>
75: class DeviceAllocator;
77: template <DeviceType T, typename PetscType>
78: class DeviceAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
79: public:
80: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(interface_type, T);
81: using base_type = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
82: using real_value_type = typename base_type::real_value_type;
83: using size_type = typename base_type::size_type;
84: using value_type = typename base_type::value_type;
86: template <typename U>
87: PETSC_NODISCARD static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
88: template <typename U>
89: PETSC_NODISCARD static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
90: template <typename U>
91: PETSC_NODISCARD static PetscErrorCode zero(value_type *, size_type, const StreamBase<U> *) noexcept;
92: template <typename U>
93: PETSC_NODISCARD static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
94: template <typename U>
95: PETSC_NODISCARD static PetscErrorCode set_canary(value_type *, size_type, const StreamBase<U> *) noexcept;
96: };
98: template <DeviceType T, typename P>
99: template <typename U>
100: inline PetscErrorCode DeviceAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *stream) noexcept
101: {
102: PetscCUPMMallocAsync(ptr, n, stream->get_stream());
103: return 0;
104: }
106: template <DeviceType T, typename P>
107: template <typename U>
108: inline PetscErrorCode DeviceAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *stream) noexcept
109: {
110: cupmFreeAsync(ptr, stream->get_stream());
111: return 0;
112: }
114: template <DeviceType T, typename P>
115: template <typename U>
116: inline PetscErrorCode DeviceAllocator<T, P>::zero(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
117: {
118: PetscCUPMMemsetAsync(ptr, 0, n, stream->get_stream(), true);
119: return 0;
120: }
122: template <DeviceType T, typename P>
123: template <typename U>
124: inline PetscErrorCode DeviceAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
125: {
126: PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyDeviceToDevice, stream->get_stream(), true);
127: return 0;
128: }
130: template <DeviceType T, typename P>
131: template <typename U>
132: inline PetscErrorCode DeviceAllocator<T, P>::set_canary(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
133: {
134: using limit_t = std::numeric_limits<real_value_type>;
135: const value_type canary = limit_t::has_signaling_NaN ? limit_t::signaling_NaN() : limit_t::max();
137: impl::ThrustSet<T>(stream->get_stream(), n, ptr, &canary);
138: return 0;
139: }
141: } // namespace cupm
143: } // namespace device
145: } // namespace Petsc
147: #endif // __cplusplus
149: #endif // CUPMALLOCATOR_HPP