Actual source code: segmentedmempool.hpp
1: #ifndef PETSC_SEGMENTEDMEMPOOL_HPP
2: #define PETSC_SEGMENTEDMEMPOOL_HPP
4: #include <petsc/private/deviceimpl.h>
6: #include <petsc/private/cpp/macros.hpp>
7: #include <petsc/private/cpp/type_traits.hpp>
8: #include <petsc/private/cpp/utility.hpp>
9: #include <petsc/private/cpp/register_finalize.hpp>
10: #include <petsc/private/cpp/memory.hpp>
12: #include <limits>
13: #include <deque>
14: #include <vector>
16: namespace Petsc
17: {
19: namespace device
20: {
22: template <typename T>
23: class StreamBase {
24: public:
25: using id_type = int;
26: using derived_type = T;
28: static const id_type INVALID_ID;
30: // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion
31: template <typename U = T>
32: PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_());
34: PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); }
36: template <typename E>
37: PETSC_NODISCARD PetscErrorCode record_event(E &&event) const noexcept
38: {
39: return static_cast<const T &>(*this).record_event_(std::forward<E>(event));
40: }
42: template <typename E>
43: PETSC_NODISCARD PetscErrorCode wait_for_event(E &&event) const noexcept
44: {
45: return static_cast<const T &>(*this).wait_for_(std::forward<E>(event));
46: }
48: protected:
49: constexpr StreamBase() noexcept = default;
51: struct default_event_type { };
52: using default_stream_type = std::nullptr_t;
54: PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; }
56: PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; }
58: template <typename U = T>
59: PETSC_NODISCARD static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept
60: {
61: return 0;
62: }
64: template <typename U = T>
65: PETSC_NODISCARD static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept
66: {
67: return 0;
68: }
69: };
71: template <typename T>
72: const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1;
74: struct DefaultStream : StreamBase<DefaultStream> {
75: using stream_type = typename StreamBase<DefaultStream>::default_stream_type;
76: using id_type = typename StreamBase<DefaultStream>::id_type;
77: using event_type = typename StreamBase<DefaultStream>::default_event_type;
78: };
80: } // namespace device
82: namespace memory
83: {
85: namespace impl
86: {
88: // ==========================================================================================
89: // MemoryChunk
90: //
91: // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning
92: // MemoryBlock and its size/capacity
93: // ==========================================================================================
95: template <typename EventType>
96: class MemoryChunk {
97: public:
98: using event_type = EventType;
99: using size_type = std::size_t;
101: MemoryChunk(size_type, size_type) noexcept;
102: explicit MemoryChunk(size_type) noexcept;
104: MemoryChunk(MemoryChunk &&) noexcept;
105: MemoryChunk &operator=(MemoryChunk &&) noexcept;
107: MemoryChunk(const MemoryChunk &) noexcept = delete;
108: MemoryChunk &operator=(const MemoryChunk &) noexcept = delete;
110: PETSC_NODISCARD size_type start() const noexcept { return start_; }
111: PETSC_NODISCARD size_type size() const noexcept { return size_; }
112: // REVIEW ME:
113: // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in
114: // theory only the last chunk needs to do this
115: PETSC_NODISCARD size_type capacity() const noexcept { return size_; }
116: PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); }
118: template <typename U>
119: PETSC_NODISCARD PetscErrorCode release(const device::StreamBase<U> *) noexcept;
120: template <typename U>
121: PETSC_NODISCARD PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept;
122: template <typename U>
123: PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept;
124: PETSC_NODISCARD PetscErrorCode resize(size_type) noexcept;
125: PETSC_NODISCARD bool contains(size_type) const noexcept;
127: private:
128: // clang-format off
129: event_type event_{}; // event recorded when the chunk was released
130: bool open_ = true; // is this chunk open?
131: // id of the last stream to use the chunk, populated on release
132: int stream_id_ = device::DefaultStream::INVALID_ID;
133: size_type size_ = 0; // size of the chunk
134: const size_type start_ = 0; // offset from the start of the owning block
135: // clang-format on
137: template <typename U>
138: PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept;
139: };
141: // ==========================================================================================
142: // MemoryChunk - Private API
143: // ==========================================================================================
145: // asks and answers the question: can this stream claim this chunk without serializing?
146: template <typename E>
147: template <typename U>
148: inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept
149: {
150: return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id());
151: }
153: // ==========================================================================================
154: // MemoryChunk - Public API
155: // ==========================================================================================
157: template <typename E>
158: inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start)
159: {
160: }
162: template <typename E>
163: inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size)
164: {
165: }
167: template <typename E>
168: inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept :
169: event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_))
170: {
171: }
173: template <typename E>
174: inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept
175: {
176: if (this != &other) {
177: event_ = std::move(other.event_);
178: open_ = util::exchange(other.open_, false);
179: stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID);
180: size_ = util::exchange(other.size_, 0);
181: start_ = std::move(other.start_);
182: }
183: PetscFunctionReturn(*this);
184: }
186: /*
187: MemoryChunk::release - release a chunk on a stream
189: Input Parameter:
190: . stream - the stream to release the chunk with
192: Notes:
193: Inserts a release operation on stream and records the state of stream at the time this
194: routine was called.
196: Future allocation requests which attempt to claim the chunk on the same stream may re-acquire
197: the chunk without serialization.
199: If another stream attempts to claim the chunk they must wait for the recorded event before
200: claiming the chunk.
201: */
202: template <typename E>
203: template <typename U>
204: inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept
205: {
206: open_ = true;
207: stream_id_ = stream->get_id();
208: stream->record_event(event_);
209: return 0;
210: }
212: /*
213: MemoryChunk::claim - attempt to claim a particular chunk
215: Input Parameters:
216: + stream - the stream on which to attempt to claim
217: . req_size - the requested size (in elements) to attempt to claim
218: - serialize - (optional, false) whether the claimant allows serialization
220: Output Parameter:
221: . success - true if the chunk was claimed, false otherwise
222: */
223: template <typename E>
224: template <typename U>
225: inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept
226: {
227: if ((*success = can_claim(stream, req_size, serialize))) {
228: if (serialize && !stream_compat_(stream)) stream->wait_for_event(event_);
229: resize(req_size);
230: open_ = false;
231: }
232: return 0;
233: }
235: /*
236: MemoryChunk::can_claim - test whether a particular chunk can be claimed
238: Input Parameters:
239: + stream - the stream on which to attempt to claim
240: . req_size - the requested size (in elements) to attempt to claim
241: - serialize - whether the claimant allows serialization
243: Output:
244: . [return] - true if the chunk is claimable given the configuration, false otherwise
245: */
246: template <typename E>
247: template <typename U>
248: inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept
249: {
250: if (open_ && (req_size <= capacity())) {
251: // fully compatible
252: if (stream_compat_(stream)) return true;
253: // stream wasn't compatible, but could claim if we serialized
254: if (serialize) return true;
255: // incompatible stream and did not want to serialize
256: }
257: return false;
258: }
260: /*
261: MemoryChunk::resize - grow a chunk to new size
263: Input Parameter:
264: . newsize - the new size Requested
266: Notes:
267: newsize cannot be larger than capacity
268: */
269: template <typename E>
270: inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept
271: {
272: PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity());
273: size_ = newsize;
274: return 0;
275: }
277: /*
278: MemoryChunk::contains - query whether a memory chunk contains a particular offset
280: Input Parameters:
281: . offset - The offset from the MemoryBlock start
283: Notes:
284: Returns true if the chunk contains the offset, false otherwise
285: */
286: template <typename E>
287: inline bool MemoryChunk<E>::contains(size_type offset) const noexcept
288: {
289: return (offset >= start()) && (offset < total_offset());
290: }
292: // ==========================================================================================
293: // MemoryBlock
294: //
295: // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving
296: // and restoring a block is thread-safe (so may be used by multiple device streams).
297: // ==========================================================================================
299: template <typename T, typename AllocatorType, typename StreamType>
300: class MemoryBlock {
301: public:
302: using value_type = T;
303: using allocator_type = AllocatorType;
304: using stream_type = StreamType;
305: using event_type = typename stream_type::event_type;
306: using chunk_type = MemoryChunk<event_type>;
307: using size_type = typename chunk_type::size_type;
308: using chunk_list_type = std::vector<chunk_type>;
310: template <typename U>
311: MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept;
313: ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value);
315: MemoryBlock(MemoryBlock &&) noexcept;
316: MemoryBlock &operator=(MemoryBlock &&) noexcept;
318: // memory blocks are not copyable
319: MemoryBlock(const MemoryBlock &) = delete;
320: MemoryBlock &operator=(const MemoryBlock &) = delete;
322: /* --- actual functions --- */
323: PETSC_NODISCARD PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept;
324: PETSC_NODISCARD PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept;
325: PETSC_NODISCARD PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept;
326: PETSC_NODISCARD bool owns_pointer(const T *) const noexcept;
328: PETSC_NODISCARD size_type size() const noexcept { return size_; }
329: PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); }
330: PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); }
332: private:
333: value_type *mem_{};
334: allocator_type *allocator_{};
335: size_type size_{};
336: chunk_list_type chunks_{};
338: PETSC_NODISCARD PetscErrorCode clear_(const stream_type *) noexcept;
339: };
341: // ==========================================================================================
342: // MemoryBlock - Private API
343: // ==========================================================================================
345: // clear the memory block, called from destructors and move assignment/construction
346: template <typename T, typename A, typename S>
347: PETSC_NODISCARD PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept
348: {
349: if (PetscLikely(mem_)) {
350: allocator_->deallocate(mem_, stream);
351: mem_ = nullptr;
352: }
353: size_ = 0;
354: chunks_.clear();
355: return 0;
356: }
358: // ==========================================================================================
359: // MemoryBlock - Public API
360: // ==========================================================================================
362: // default constructor, allocates memory immediately
363: template <typename T, typename A, typename S>
364: template <typename U>
365: MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s)
366: {
367: PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream);
368: PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s);
369: return;
370: }
372: template <typename T, typename A, typename S>
373: MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value)
374: {
375: stream_type stream;
377: PETSC_COMM_SELF, clear_(&stream);
378: return;
379: }
381: template <typename T, typename A, typename S>
382: MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_))
383: {
384: }
386: template <typename T, typename A, typename S>
387: MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept
388: {
389: if (this != &other) {
390: stream_type stream;
392: PETSC_COMM_SELF, clear_(&stream);
393: mem_ = util::exchange(other.mem_, nullptr);
394: allocator_ = other.allocator_;
395: size_ = util::exchange(other.size_, 0);
396: chunks_ = std::move(other.chunks_);
397: }
398: PetscFunctionReturn(*this);
399: }
401: /*
402: MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise
403: */
404: template <typename T, typename A, typename S>
405: inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept
406: {
407: // each pool is linear in memory, so it suffices to check the bounds
408: return (ptr >= mem_) && (ptr < std::next(mem_, size()));
409: }
411: /*
412: MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock
414: Input Parameters:
415: + req_size - the requested size of the allocation (in elements)
416: . ptr - ptr to fill
417: - stream - stream to fill the pointer on
419: Output Parameter:
420: . success - true if chunk was gotten, false otherwise
422: Notes:
423: If the current memory could not satisfy the memory request, ptr is unchanged
424: */
425: template <typename T, typename A, typename S>
426: inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept
427: {
428: *success = false;
429: if (req_size <= size()) {
430: const auto try_create_chunk = [&]() {
431: const auto was_empty = chunks_.empty();
432: const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset();
434: if (block_alloced + req_size <= size()) {
435: chunks_.emplace_back(block_alloced, req_size);
436: chunks_.back().claim(stream, req_size, success);
437: *ptr = mem_ + block_alloced;
438: if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size());
439: }
440: return 0;
441: };
442: const auto try_find_open_chunk = [&](bool serialize = false) {
443: for (auto &chunk : chunks_) {
444: chunk.claim(stream, req_size, success, serialize);
445: if (*success) {
446: *ptr = mem_ + chunk.start();
447: break;
448: }
449: }
450: return 0;
451: };
452: const auto try_steal_other_stream_chunk = [&]() {
453: try_find_open_chunk(true);
454: return 0;
455: };
457: // search previously distributed chunks, but only claim one if it is on the same stream
458: // as us
459: try_find_open_chunk();
461: // if we are here we couldn't reuse one of our own chunks so check first if the pool
462: // has room for a new one
463: if (!*success) try_create_chunk();
465: // try pruning dead chunks off the back, note we do this regardless of whether we are
466: // successful
467: while (chunks_.back().can_claim(stream, 0, false)) {
468: chunks_.pop_back();
469: if (chunks_.empty()) {
470: // if chunks are empty it implies we have managed to claim (and subsequently destroy)
471: // our own chunk twice! something has gone wrong
472: PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size());
473: break;
474: }
475: }
477: // if previously unsuccessful see if enough space has opened up due to pruning. note that
478: // if the chunk list was emptied from the pruning this call must succeed in allocating a
479: // chunk, otherwise something is wrong
480: if (!*success) try_create_chunk();
482: // last resort, iterate over all chunks and see if we can steal one by waiting on the
483: // current owner to finish using it
484: if (!*success) try_steal_other_stream_chunk();
485: }
486: return 0;
487: }
489: /*
490: MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock
492: Input Parameters:
493: + ptr - ptr to restore
494: - stream - stream to restore the pointer on
496: Output Parameter:
497: . success - true if chunk was restored, false otherwise
499: Notes:
500: ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned
501: by this MemoryBlock then it is restored on stream. The same stream may receive ptr again
502: without synchronization, but other streams may not do so until either serializing or the
503: stream is idle again.
504: */
505: template <typename T, typename A, typename S>
506: inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept
507: {
508: chunk_type *chunk = nullptr;
510: try_find_chunk(*ptr, &chunk);
511: if (chunk) {
512: chunk->release(stream);
513: *ptr = nullptr;
514: *success = true;
515: } else {
516: *success = false;
517: }
518: return 0;
519: }
521: /*
522: MemoryBlock::try_find_chunk - try to find the chunk which owns ptr
524: Input Parameter:
525: . ptr - the pointer to lookk for
527: Output Parameter:
528: . ret_chunk - pointer to the owning chunk or nullptr if not found
529: */
530: template <typename T, typename A, typename S>
531: inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept
532: {
533: *ret_chunk = nullptr;
534: if (owns_pointer(ptr)) {
535: const auto offset = static_cast<size_type>(ptr - mem_);
537: for (auto &chunk : chunks_) {
538: if (chunk.contains(offset)) {
539: *ret_chunk = &chunk;
540: break;
541: }
542: }
544: PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size())));
545: }
546: return 0;
547: }
549: namespace detail
550: {
552: template <typename T>
553: struct real_type {
554: using type = T;
555: };
557: template <>
558: struct real_type<PetscScalar> {
559: using type = PetscReal;
560: };
562: } // namespace detail
564: template <typename T>
565: struct SegmentedMemoryPoolAllocatorBase {
566: using value_type = T;
567: using size_type = std::size_t;
568: using real_value_type = typename detail::real_type<T>::type;
570: template <typename U>
571: PETSC_NODISCARD static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept;
572: template <typename U>
573: PETSC_NODISCARD static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept;
574: template <typename U>
575: PETSC_NODISCARD static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept;
576: template <typename U>
577: PETSC_NODISCARD static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept;
578: template <typename U>
579: PETSC_NODISCARD static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept;
580: };
582: template <typename T>
583: template <typename U>
584: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept
585: {
586: PetscMalloc1(n, ptr);
587: return 0;
588: }
590: template <typename T>
591: template <typename U>
592: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept
593: {
594: PetscFree(ptr);
595: return 0;
596: }
598: template <typename T>
599: template <typename U>
600: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
601: {
602: PetscArrayzero(ptr, n);
603: return 0;
604: }
606: template <typename T>
607: template <typename U>
608: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept
609: {
610: PetscArraycpy(dest, src, n);
611: return 0;
612: }
614: template <typename T>
615: template <typename U>
616: inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
617: {
618: using limit_type = std::numeric_limits<real_value_type>;
619: constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max();
621: for (size_type i = 0; i < n; ++i) ptr[i] = canary;
622: return 0;
623: }
625: } // namespace impl
627: // ==========================================================================================
628: // SegmentedMemoryPool
629: //
630: // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an
631: // allocated buffer. This buffer is further split into memory "chunks" which control
632: // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states:
633: //
634: // 1. Open:
635: // The chunk is free to be claimed by the next suitable allocation request. If the
636: // allocation request is made on the same stream as the chunk was deallocated on, no
637: // serialization needs to occur. If not, the allocating stream must wait for the
638: // event. Claiming the chunk "closes" the chunk.
639: //
640: // 2. Closed:
641: // The chunk has been claimed by an allocation request. It cannot be opened again until it
642: // is deallocated; doing so "opens" the chunk.
643: //
644: // Note that there does not need to be a chunk for every region, chunks are created to satisfy
645: // an allocation request.
646: //
647: // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may
648: // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation
649: // request. This region exists _only_ at the end, as there are no gaps between chunks.
650: //
651: //
652: // |-----------------------------------------------------------------------------------------
653: // | SegmentedMemoryPool
654: // |
655: // | ||-------------||
656: // | || || -------------------------------------------------------------------
657: // | || || | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX...
658: // | || || | | | | | |
659: // | || || | x-----x-------x-----xx---------x---------x------x-----x
660: // | || MemoryBlock || -> | ------|-------------|----------|----------------|--------
661: // | || || | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk |
662: // | || || | ---------------------------------------------------------
663: // | || || -------------------------------------------------------------------
664: // | ||-------------||
665: // | || ||
666: // | || ... ||
667: // | || ||
668: // ==========================================================================================
670: template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256>
671: class SegmentedMemoryPool;
673: // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks.
674: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
675: class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> {
676: public:
677: using value_type = MemType;
678: using stream_type = StreamType;
679: using allocator_type = AllocType;
680: using block_type = impl::MemoryBlock<value_type, allocator_type, stream_type>;
681: using pool_type = std::deque<block_type>;
682: using size_type = typename block_type::size_type;
684: explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value);
686: PETSC_NODISCARD PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept;
687: PETSC_NODISCARD PetscErrorCode deallocate(value_type **, const stream_type *) noexcept;
688: PETSC_NODISCARD PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept;
690: private:
691: pool_type pool_;
692: allocator_type allocator_;
693: size_type chunk_size_;
695: PETSC_NODISCARD PetscErrorCode make_block_(size_type, const stream_type *) noexcept;
697: friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>;
698: PETSC_NODISCARD PetscErrorCode register_finalize_(const stream_type *) noexcept;
699: PETSC_NODISCARD PetscErrorCode finalize_() noexcept;
701: PETSC_NODISCARD PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept;
702: };
704: // ==========================================================================================
705: // SegmentedMemoryPool - Private API
706: // ==========================================================================================
708: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
709: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept
710: {
711: const auto block_size = std::max(size, chunk_size_);
713: pool_.emplace_back(&allocator_, block_size, stream);
714: PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size());
715: return 0;
716: }
718: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
719: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept
720: {
721: make_block_(chunk_size_, stream);
722: return 0;
723: }
725: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
726: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept
727: {
728: pool_.clear();
729: chunk_size_ = DefaultChunkSize;
730: return 0;
731: }
733: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
734: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept
735: {
736: auto found = false;
738: this->register_finalize(stream);
739: for (auto &block : pool_) {
740: block.try_allocate_chunk(size, ptr, stream, &found);
741: if (PetscLikely(found)) return 0;
742: }
744: PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size);
745: // if we are here we couldn't find an open block in the pool, so make a new block
746: make_block_(size, stream);
747: // and assign it
748: pool_.back().try_allocate_chunk(size, ptr, stream, &found);
749: PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size());
750: return 0;
751: }
753: // ==========================================================================================
754: // SegmentedMemoryPool - Public API
755: // ==========================================================================================
757: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
758: inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size)
759: {
760: }
762: /*
763: SegmentedMemoryPool::allocate - get an allocation from the memory pool
765: Input Parameters:
766: + req_size - size (in elements) to get
767: . ptr - the pointer to hold the allocation
768: - stream - the stream on which to get the allocation
770: Output Parameter:
771: . ptr - the pointer holding the allocation
773: Notes:
774: req_size cannot be negative. If req_size if zero, ptr is set to nullptr
775: */
776: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
777: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept
778: {
779: value_type *ret_ptr = nullptr;
781: PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size);
784: if (req_size) {
785: const auto size = static_cast<size_type>(req_size);
786: auto aligned_size = alignment == alignof(char) ? size : size + alignment;
787: void *vptr = nullptr;
789: allocate_(aligned_size, &ret_ptr, stream);
790: vptr = ret_ptr;
791: std::align(alignment, size, vptr, aligned_size);
792: ret_ptr = reinterpret_cast<value_type *>(vptr);
793: // sets memory to NaN or infinity depending on the type to catch out uninitialized memory
794: // accesses.
795: if (PetscDefined(USE_DEBUG)) allocator_.set_canary(ret_ptr, size, stream);
796: }
797: *ptr = ret_ptr;
798: return 0;
799: }
801: /*
802: SegmentedMemoryPool::deallocate - release a pointer back to the memory pool
804: Input Parameters:
805: + ptr - the pointer to release
806: - stream - the stream to release it on
808: Notes:
809: If ptr is not owned by the pool it is unchanged.
810: */
811: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
812: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept
813: {
816: // nobody owns a nullptr, and if they do then they have bigger problems
817: if (!*ptr) return 0;
818: for (auto &block : pool_) {
819: auto found = false;
821: block.try_deallocate_chunk(ptr, stream, &found);
822: if (PetscLikely(found)) break;
823: }
824: return 0;
825: }
827: /*
828: SegmentedMemoryPool::reallocate - Resize an allocated buffer
830: Input Parameters:
831: + new_req_size - the new buffer size
832: . ptr - pointer to the buffer
833: - stream - stream to resize with
835: Output Parameter:
836: . ptr - pointer to the new region
838: Notes:
839: ptr must have been allocated by the pool.
841: It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated).
842: */
843: template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
844: inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept
845: {
846: using chunk_type = typename block_type::chunk_type;
848: const auto new_size = static_cast<size_type>(new_req_size);
849: const auto old_ptr = *ptr;
850: chunk_type *chunk = nullptr;
852: PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size);
856: // if reallocating to zero, just free
857: if (PetscUnlikely(new_size == 0)) {
858: deallocate(ptr, stream);
859: return 0;
860: }
862: // search the blocks for the owning chunk
863: for (auto &block : pool_) {
864: block.try_find_chunk(old_ptr, &chunk);
865: if (chunk) break; // found
866: }
867: PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr);
869: if (chunk->capacity() < new_size) {
870: // chunk does not have enough room, need to grab a fresh chunk and copy to it
871: *ptr = nullptr;
872: chunk->release(stream);
873: allocate(new_size, ptr, stream);
874: allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream);
875: } else {
876: // chunk had enough room we can simply grow (or shrink) to fit the new size
877: chunk->resize(new_size);
878: }
879: return 0;
880: }
882: } // namespace memory
884: } // namespace Petsc
886: #endif // PETSC_SEGMENTEDMEMPOOL_HPP