1/***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3 * Martin Renou *
4 * Copyright (c) QuantStack *
5 * Copyright (c) Serge Guelton *
6 * *
7 * Distributed under the terms of the BSD 3-Clause License. *
8 * *
9 * The full license is in the file LICENSE, distributed with this software. *
10 ****************************************************************************/
11
12#ifndef XSIMD_ALIGNED_ALLOCATOR_HPP
13#define XSIMD_ALIGNED_ALLOCATOR_HPP
14
15#include <algorithm>
16#include <cstddef>
17#include <utility>
18#ifdef _WIN32
19#include <malloc.h>
20#else
21#include <cstdlib>
22#endif
23
24#include <cassert>
25#include <memory>
26
27#include "../config/xsimd_arch.hpp"
28
29namespace xsimd
30{
31
32 /**
33 * @class aligned_allocator
34 * @brief Allocator for aligned memory
35 *
36 * The aligned_allocator class template is an allocator that
37 * performs memory allocation aligned by the specified value.
38 *
39 * @tparam T type of objects to allocate.
40 * @tparam Align alignment in bytes.
41 */
42 template <class T, size_t Align = default_arch::alignment()>
43 class aligned_allocator
44 {
45 public:
46 using value_type = T;
47 using pointer = T*;
48 using const_pointer = const T*;
49 using reference = T&;
50 using const_reference = const T&;
51 using size_type = size_t;
52 using difference_type = ptrdiff_t;
53
54 static constexpr size_t alignment = Align;
55
56 template <class U>
57 struct rebind
58 {
59 using other = aligned_allocator<U, Align>;
60 };
61
62 aligned_allocator() noexcept;
63 aligned_allocator(const aligned_allocator& rhs) noexcept;
64
65 template <class U>
66 aligned_allocator(const aligned_allocator<U, Align>& rhs) noexcept;
67
68 ~aligned_allocator();
69
70 pointer address(reference) noexcept;
71 const_pointer address(const_reference) const noexcept;
72
73 pointer allocate(size_type n, const void* hint = 0);
74 void deallocate(pointer p, size_type n);
75
76 size_type max_size() const noexcept;
77 size_type size_max() const noexcept;
78
79 template <class U, class... Args>
80 void construct(U* p, Args&&... args);
81
82 template <class U>
83 void destroy(U* p);
84 };
85
86 template <class T1, size_t Align1, class T2, size_t Align2>
87 bool operator==(const aligned_allocator<T1, Align1>& lhs,
88 const aligned_allocator<T2, Align2>& rhs) noexcept;
89
90 template <class T1, size_t Align1, class T2, size_t Align2>
91 bool operator!=(const aligned_allocator<T1, Align1>& lhs,
92 const aligned_allocator<T2, Align2>& rhs) noexcept;
93
94 void* aligned_malloc(size_t size, size_t alignment);
95 void aligned_free(void* ptr);
96
97 template <class T>
98 size_t get_alignment_offset(const T* p, size_t size, size_t block_size);
99
100 /************************************
101 * aligned_allocator implementation *
102 ************************************/
103
104 /**
105 * Default constructor.
106 */
107 template <class T, size_t A>
108 inline aligned_allocator<T, A>::aligned_allocator() noexcept
109 {
110 }
111
112 /**
113 * Copy constructor.
114 */
115 template <class T, size_t A>
116 inline aligned_allocator<T, A>::aligned_allocator(const aligned_allocator&) noexcept
117 {
118 }
119
120 /**
121 * Extended copy constructor.
122 */
123 template <class T, size_t A>
124 template <class U>
125 inline aligned_allocator<T, A>::aligned_allocator(const aligned_allocator<U, A>&) noexcept
126 {
127 }
128
129 /**
130 * Destructor.
131 */
132 template <class T, size_t A>
133 inline aligned_allocator<T, A>::~aligned_allocator()
134 {
135 }
136
137 /**
138 * Returns the actual address of \c r even in presence of overloaded \c operator&.
139 * @param r the object to acquire address of.
140 * @return the actual address of \c r.
141 */
142 template <class T, size_t A>
143 inline auto
144 aligned_allocator<T, A>::address(reference r) noexcept -> pointer
145 {
146 return &r;
147 }
148
149 /**
150 * Returns the actual address of \c r even in presence of overloaded \c operator&.
151 * @param r the object to acquire address of.
152 * @return the actual address of \c r.
153 */
154 template <class T, size_t A>
155 inline auto
156 aligned_allocator<T, A>::address(const_reference r) const noexcept -> const_pointer
157 {
158 return &r;
159 }
160
161 /**
162 * Allocates <tt>n * sizeof(T)</tt> bytes of uninitialized memory, aligned by \c A.
163 * The alignment may require some extra memory allocation.
164 * @param n the number of objects to allocate storage for.
165 * @param hint unused parameter provided for standard compliance.
166 * @return a pointer to the first byte of a memory block suitably aligned and sufficient to
167 * hold an array of \c n objects of type \c T.
168 */
169 template <class T, size_t A>
170 inline auto
171 aligned_allocator<T, A>::allocate(size_type n, const void*) -> pointer
172 {
173 pointer res = reinterpret_cast<pointer>(aligned_malloc(size: sizeof(T) * n, alignment: A));
174#if defined(_CPPUNWIND) || defined(__cpp_exceptions)
175 if (res == nullptr)
176 throw std::bad_alloc();
177#endif
178 return res;
179 }
180
181 /**
182 * Deallocates the storage referenced by the pointer p, which must be a pointer obtained by
183 * an earlier call to allocate(). The argument \c n must be equal to the first argument of the call
184 * to allocate() that originally produced \c p; otherwise, the behavior is undefined.
185 * @param p pointer obtained from allocate().
186 * @param n number of objects earlier passed to allocate().
187 */
188 template <class T, size_t A>
189 inline void aligned_allocator<T, A>::deallocate(pointer p, size_type)
190 {
191 aligned_free(p);
192 }
193
194 /**
195 * Returns the maximum theoretically possible value of \c n, for which the
196 * call allocate(n, 0) could succeed.
197 * @return the maximum supported allocated size.
198 */
199 template <class T, size_t A>
200 inline auto
201 aligned_allocator<T, A>::max_size() const noexcept -> size_type
202 {
203 return size_type(-1) / sizeof(T);
204 }
205
206 /**
207 * This method is deprecated, use max_size() instead
208 */
209 template <class T, size_t A>
210 inline auto
211 aligned_allocator<T, A>::size_max() const noexcept -> size_type
212 {
213 return size_type(-1) / sizeof(T);
214 }
215
216 /**
217 * Constructs an object of type \c T in allocated uninitialized memory
218 * pointed to by \c p, using placement-new.
219 * @param p pointer to allocated uninitialized memory.
220 * @param args the constructor arguments to use.
221 */
222 template <class T, size_t A>
223 template <class U, class... Args>
224 inline void aligned_allocator<T, A>::construct(U* p, Args&&... args)
225 {
226 new ((void*)p) U(std::forward<Args>(args)...);
227 }
228
229 /**
230 * Calls the destructor of the object pointed to by \c p.
231 * @param p pointer to the object that is going to be destroyed.
232 */
233 template <class T, size_t A>
234 template <class U>
235 inline void aligned_allocator<T, A>::destroy(U* p)
236 {
237 p->~U();
238 }
239
240 /**
241 * @defgroup allocator_comparison Comparison operators
242 */
243
244 /**
245 * @ingroup allocator_comparison
246 * Compares two aligned memory allocator for equality. Since allocators
247 * are stateless, return \c true iff <tt>A1 == A2</tt>.
248 * @param lhs aligned_allocator to compare.
249 * @param rhs aligned_allocator to compare.
250 * @return true if the allocators have the same alignment.
251 */
252 template <class T1, size_t A1, class T2, size_t A2>
253 inline bool operator==(const aligned_allocator<T1, A1>& lhs,
254 const aligned_allocator<T2, A2>& rhs) noexcept
255 {
256 return lhs.alignment == rhs.alignment;
257 }
258
259 /**
260 * @ingroup allocator_comparison
261 * Compares two aligned memory allocator for inequality. Since allocators
262 * are stateless, return \c true iff <tt>A1 != A2</tt>.
263 * @param lhs aligned_allocator to compare.
264 * @param rhs aligned_allocator to compare.
265 * @return true if the allocators have different alignments.
266 */
267 template <class T1, size_t A1, class T2, size_t A2>
268 inline bool operator!=(const aligned_allocator<T1, A1>& lhs,
269 const aligned_allocator<T2, A2>& rhs) noexcept
270 {
271 return !(lhs == rhs);
272 }
273
274 /****************************************
275 * aligned malloc / free implementation *
276 ****************************************/
277
278 namespace detail
279 {
280 inline void* xaligned_malloc(size_t size, size_t alignment)
281 {
282 assert(((alignment & (alignment - 1)) == 0) && "alignment must be a power of two");
283 assert((alignment >= sizeof(void*)) && "alignment must be at least the size of a pointer");
284 void* res = nullptr;
285#ifdef _WIN32
286 res = _aligned_malloc(size, alignment);
287#else
288 if (posix_memalign(memptr: &res, alignment, size) != 0)
289 {
290 res = nullptr;
291 }
292#endif
293 return res;
294 }
295
296 inline void xaligned_free(void* ptr)
297 {
298#ifdef _WIN32
299 _aligned_free(ptr);
300#else
301 free(ptr);
302#endif
303 }
304 }
305
306 inline void* aligned_malloc(size_t size, size_t alignment)
307 {
308 return detail::xaligned_malloc(size, alignment);
309 }
310
311 inline void aligned_free(void* ptr)
312 {
313 detail::xaligned_free(ptr);
314 }
315
316 template <class T>
317 inline size_t get_alignment_offset(const T* p, size_t size, size_t block_size)
318 {
319 // size_t block_size = simd_traits<T>::size;
320 if (block_size == 1)
321 {
322 // The simd_block consists of exactly one scalar so that all
323 // elements of the array
324 // are "well" aligned.
325 return 0;
326 }
327 else if (size_t(p) & (sizeof(T) - 1))
328 {
329 // The array is not aligned to the size of a single element, so that
330 // no element
331 // of the array is well aligned
332 return size;
333 }
334 else
335 {
336 size_t block_mask = block_size - 1;
337 return std::min<size_t>(
338 a: (block_size - ((size_t(p) / sizeof(T)) & block_mask)) & block_mask,
339 b: size);
340 }
341 }
342
343 template <class T, class A = default_arch>
344 using default_allocator = typename std::conditional<A::requires_alignment(),
345 aligned_allocator<T, A::alignment()>,
346 std::allocator<T>>::type;
347}
348
349#endif
350