1 | /*************************************************************************** |
2 | * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
3 | * Martin Renou * |
4 | * Copyright (c) QuantStack * |
5 | * Copyright (c) Serge Guelton * |
6 | * * |
7 | * Distributed under the terms of the BSD 3-Clause License. * |
8 | * * |
9 | * The full license is in the file LICENSE, distributed with this software. * |
10 | ****************************************************************************/ |
11 | |
12 | #ifndef XSIMD_ALIGNED_ALLOCATOR_HPP |
13 | #define XSIMD_ALIGNED_ALLOCATOR_HPP |
14 | |
15 | #include <algorithm> |
16 | #include <cstddef> |
17 | #include <utility> |
18 | #ifdef _WIN32 |
19 | #include <malloc.h> |
20 | #else |
21 | #include <cstdlib> |
22 | #endif |
23 | |
24 | #include <cassert> |
25 | #include <memory> |
26 | |
27 | #include "../config/xsimd_arch.hpp" |
28 | |
29 | namespace xsimd |
30 | { |
31 | |
32 | /** |
33 | * @class aligned_allocator |
34 | * @brief Allocator for aligned memory |
35 | * |
36 | * The aligned_allocator class template is an allocator that |
37 | * performs memory allocation aligned by the specified value. |
38 | * |
39 | * @tparam T type of objects to allocate. |
40 | * @tparam Align alignment in bytes. |
41 | */ |
42 | template <class T, size_t Align = default_arch::alignment()> |
43 | class aligned_allocator |
44 | { |
45 | public: |
46 | using value_type = T; |
47 | using pointer = T*; |
48 | using const_pointer = const T*; |
49 | using reference = T&; |
50 | using const_reference = const T&; |
51 | using size_type = size_t; |
52 | using difference_type = ptrdiff_t; |
53 | |
54 | static constexpr size_t alignment = Align; |
55 | |
56 | template <class U> |
57 | struct rebind |
58 | { |
59 | using other = aligned_allocator<U, Align>; |
60 | }; |
61 | |
62 | aligned_allocator() noexcept; |
63 | aligned_allocator(const aligned_allocator& rhs) noexcept; |
64 | |
65 | template <class U> |
66 | aligned_allocator(const aligned_allocator<U, Align>& rhs) noexcept; |
67 | |
68 | ~aligned_allocator(); |
69 | |
70 | pointer address(reference) noexcept; |
71 | const_pointer address(const_reference) const noexcept; |
72 | |
73 | pointer allocate(size_type n, const void* hint = 0); |
74 | void deallocate(pointer p, size_type n); |
75 | |
76 | size_type max_size() const noexcept; |
77 | size_type size_max() const noexcept; |
78 | |
79 | template <class U, class... Args> |
80 | void construct(U* p, Args&&... args); |
81 | |
82 | template <class U> |
83 | void destroy(U* p); |
84 | }; |
85 | |
86 | template <class T1, size_t Align1, class T2, size_t Align2> |
87 | bool operator==(const aligned_allocator<T1, Align1>& lhs, |
88 | const aligned_allocator<T2, Align2>& rhs) noexcept; |
89 | |
90 | template <class T1, size_t Align1, class T2, size_t Align2> |
91 | bool operator!=(const aligned_allocator<T1, Align1>& lhs, |
92 | const aligned_allocator<T2, Align2>& rhs) noexcept; |
93 | |
94 | void* aligned_malloc(size_t size, size_t alignment); |
95 | void aligned_free(void* ptr); |
96 | |
97 | template <class T> |
98 | size_t get_alignment_offset(const T* p, size_t size, size_t block_size); |
99 | |
100 | /************************************ |
101 | * aligned_allocator implementation * |
102 | ************************************/ |
103 | |
104 | /** |
105 | * Default constructor. |
106 | */ |
107 | template <class T, size_t A> |
108 | inline aligned_allocator<T, A>::aligned_allocator() noexcept |
109 | { |
110 | } |
111 | |
112 | /** |
113 | * Copy constructor. |
114 | */ |
115 | template <class T, size_t A> |
116 | inline aligned_allocator<T, A>::aligned_allocator(const aligned_allocator&) noexcept |
117 | { |
118 | } |
119 | |
120 | /** |
121 | * Extended copy constructor. |
122 | */ |
123 | template <class T, size_t A> |
124 | template <class U> |
125 | inline aligned_allocator<T, A>::aligned_allocator(const aligned_allocator<U, A>&) noexcept |
126 | { |
127 | } |
128 | |
129 | /** |
130 | * Destructor. |
131 | */ |
132 | template <class T, size_t A> |
133 | inline aligned_allocator<T, A>::~aligned_allocator() |
134 | { |
135 | } |
136 | |
137 | /** |
138 | * Returns the actual address of \c r even in presence of overloaded \c operator&. |
139 | * @param r the object to acquire address of. |
140 | * @return the actual address of \c r. |
141 | */ |
142 | template <class T, size_t A> |
143 | inline auto |
144 | aligned_allocator<T, A>::address(reference r) noexcept -> pointer |
145 | { |
146 | return &r; |
147 | } |
148 | |
149 | /** |
150 | * Returns the actual address of \c r even in presence of overloaded \c operator&. |
151 | * @param r the object to acquire address of. |
152 | * @return the actual address of \c r. |
153 | */ |
154 | template <class T, size_t A> |
155 | inline auto |
156 | aligned_allocator<T, A>::address(const_reference r) const noexcept -> const_pointer |
157 | { |
158 | return &r; |
159 | } |
160 | |
161 | /** |
162 | * Allocates <tt>n * sizeof(T)</tt> bytes of uninitialized memory, aligned by \c A. |
163 | * The alignment may require some extra memory allocation. |
164 | * @param n the number of objects to allocate storage for. |
165 | * @param hint unused parameter provided for standard compliance. |
166 | * @return a pointer to the first byte of a memory block suitably aligned and sufficient to |
167 | * hold an array of \c n objects of type \c T. |
168 | */ |
169 | template <class T, size_t A> |
170 | inline auto |
171 | aligned_allocator<T, A>::allocate(size_type n, const void*) -> pointer |
172 | { |
173 | pointer res = reinterpret_cast<pointer>(aligned_malloc(size: sizeof(T) * n, alignment: A)); |
174 | #if defined(_CPPUNWIND) || defined(__cpp_exceptions) |
175 | if (res == nullptr) |
176 | throw std::bad_alloc(); |
177 | #endif |
178 | return res; |
179 | } |
180 | |
181 | /** |
182 | * Deallocates the storage referenced by the pointer p, which must be a pointer obtained by |
183 | * an earlier call to allocate(). The argument \c n must be equal to the first argument of the call |
184 | * to allocate() that originally produced \c p; otherwise, the behavior is undefined. |
185 | * @param p pointer obtained from allocate(). |
186 | * @param n number of objects earlier passed to allocate(). |
187 | */ |
188 | template <class T, size_t A> |
189 | inline void aligned_allocator<T, A>::deallocate(pointer p, size_type) |
190 | { |
191 | aligned_free(p); |
192 | } |
193 | |
194 | /** |
195 | * Returns the maximum theoretically possible value of \c n, for which the |
196 | * call allocate(n, 0) could succeed. |
197 | * @return the maximum supported allocated size. |
198 | */ |
199 | template <class T, size_t A> |
200 | inline auto |
201 | aligned_allocator<T, A>::max_size() const noexcept -> size_type |
202 | { |
203 | return size_type(-1) / sizeof(T); |
204 | } |
205 | |
206 | /** |
207 | * This method is deprecated, use max_size() instead |
208 | */ |
209 | template <class T, size_t A> |
210 | inline auto |
211 | aligned_allocator<T, A>::size_max() const noexcept -> size_type |
212 | { |
213 | return size_type(-1) / sizeof(T); |
214 | } |
215 | |
216 | /** |
217 | * Constructs an object of type \c T in allocated uninitialized memory |
218 | * pointed to by \c p, using placement-new. |
219 | * @param p pointer to allocated uninitialized memory. |
220 | * @param args the constructor arguments to use. |
221 | */ |
222 | template <class T, size_t A> |
223 | template <class U, class... Args> |
224 | inline void aligned_allocator<T, A>::construct(U* p, Args&&... args) |
225 | { |
226 | new ((void*)p) U(std::forward<Args>(args)...); |
227 | } |
228 | |
229 | /** |
230 | * Calls the destructor of the object pointed to by \c p. |
231 | * @param p pointer to the object that is going to be destroyed. |
232 | */ |
233 | template <class T, size_t A> |
234 | template <class U> |
235 | inline void aligned_allocator<T, A>::destroy(U* p) |
236 | { |
237 | p->~U(); |
238 | } |
239 | |
240 | /** |
241 | * @defgroup allocator_comparison Comparison operators |
242 | */ |
243 | |
244 | /** |
245 | * @ingroup allocator_comparison |
246 | * Compares two aligned memory allocator for equality. Since allocators |
247 | * are stateless, return \c true iff <tt>A1 == A2</tt>. |
248 | * @param lhs aligned_allocator to compare. |
249 | * @param rhs aligned_allocator to compare. |
250 | * @return true if the allocators have the same alignment. |
251 | */ |
252 | template <class T1, size_t A1, class T2, size_t A2> |
253 | inline bool operator==(const aligned_allocator<T1, A1>& lhs, |
254 | const aligned_allocator<T2, A2>& rhs) noexcept |
255 | { |
256 | return lhs.alignment == rhs.alignment; |
257 | } |
258 | |
259 | /** |
260 | * @ingroup allocator_comparison |
261 | * Compares two aligned memory allocator for inequality. Since allocators |
262 | * are stateless, return \c true iff <tt>A1 != A2</tt>. |
263 | * @param lhs aligned_allocator to compare. |
264 | * @param rhs aligned_allocator to compare. |
265 | * @return true if the allocators have different alignments. |
266 | */ |
267 | template <class T1, size_t A1, class T2, size_t A2> |
268 | inline bool operator!=(const aligned_allocator<T1, A1>& lhs, |
269 | const aligned_allocator<T2, A2>& rhs) noexcept |
270 | { |
271 | return !(lhs == rhs); |
272 | } |
273 | |
274 | /**************************************** |
275 | * aligned malloc / free implementation * |
276 | ****************************************/ |
277 | |
278 | namespace detail |
279 | { |
280 | inline void* xaligned_malloc(size_t size, size_t alignment) |
281 | { |
282 | assert(((alignment & (alignment - 1)) == 0) && "alignment must be a power of two" ); |
283 | assert((alignment >= sizeof(void*)) && "alignment must be at least the size of a pointer" ); |
284 | void* res = nullptr; |
285 | #ifdef _WIN32 |
286 | res = _aligned_malloc(size, alignment); |
287 | #else |
288 | if (posix_memalign(memptr: &res, alignment, size) != 0) |
289 | { |
290 | res = nullptr; |
291 | } |
292 | #endif |
293 | return res; |
294 | } |
295 | |
296 | inline void xaligned_free(void* ptr) |
297 | { |
298 | #ifdef _WIN32 |
299 | _aligned_free(ptr); |
300 | #else |
301 | free(ptr); |
302 | #endif |
303 | } |
304 | } |
305 | |
306 | inline void* aligned_malloc(size_t size, size_t alignment) |
307 | { |
308 | return detail::xaligned_malloc(size, alignment); |
309 | } |
310 | |
311 | inline void aligned_free(void* ptr) |
312 | { |
313 | detail::xaligned_free(ptr); |
314 | } |
315 | |
316 | template <class T> |
317 | inline size_t get_alignment_offset(const T* p, size_t size, size_t block_size) |
318 | { |
319 | // size_t block_size = simd_traits<T>::size; |
320 | if (block_size == 1) |
321 | { |
322 | // The simd_block consists of exactly one scalar so that all |
323 | // elements of the array |
324 | // are "well" aligned. |
325 | return 0; |
326 | } |
327 | else if (size_t(p) & (sizeof(T) - 1)) |
328 | { |
329 | // The array is not aligned to the size of a single element, so that |
330 | // no element |
331 | // of the array is well aligned |
332 | return size; |
333 | } |
334 | else |
335 | { |
336 | size_t block_mask = block_size - 1; |
337 | return std::min<size_t>( |
338 | a: (block_size - ((size_t(p) / sizeof(T)) & block_mask)) & block_mask, |
339 | b: size); |
340 | } |
341 | } |
342 | |
343 | template <class T, class A = default_arch> |
344 | using default_allocator = typename std::conditional<A::requires_alignment(), |
345 | aligned_allocator<T, A::alignment()>, |
346 | std::allocator<T>>::type; |
347 | } |
348 | |
349 | #endif |
350 | |