memcpySmall.h source code [ClickHouse/dbms/src/Common/memcpySmall.h]

1	#pragma once
2
3	#include <string.h>
4
5	#ifdef __SSE2__
6	#include <emmintrin.h>
7
8
9	/* memcpy function could work suboptimal if all the following conditions are met:*
10	* 1. Size of memory region is relatively small (approximately, under 50 bytes).
11	* 2. Size of memory region is not known at compile-time.
12	*
13	* In that case, memcpy works suboptimal by following reasons:
14	* 1. Function is not inlined.
15	* 2. Much time/instructions are spend to process "tails" of data.
16	*
17	* There are cases when function could be implemented in more optimal way, with help of some assumptions.
18	* One of that assumptions - ability to read and write some number of bytes after end of passed memory regions.
19	* Under that assumption, it is possible not to implement difficult code to process tails of data and do copy always by big chunks.
20	*
21	* This case is typical, for example, when many small pieces of data are gathered to single contiguous piece of memory in a loop.
22	* - because each next copy will overwrite excessive data after previous copy.
23	*
24	* Assumption that size of memory region is small enough allows us to not unroll the loop.
25	* This is slower, when size of memory is actually big.
26	*
27	* Use with caution.
28	*/
29
30	namespace detail
31	{
32	inline void memcpySmallAllowReadWriteOverflow15Impl(char * __restrict dst, const char * __restrict src, ssize_t n)
33	{
34	while (n > `0`)
35	{
36	_mm_storeu_si128(reinterpret_cast<__m128i *>(dst),
37	_mm_loadu_si128(reinterpret_cast<const __m128i *>(src)));
38
39	dst += `16`;
40	src += `16`;
41	n -= `16`;
42	}
43	}
44	}
45
46	/* Works under assumption, that it's possible to read up to 15 excessive bytes after end of 'src' region*
47	* and to write any garbage into up to 15 bytes after end of 'dst' region.
48	*/
49	inline void memcpySmallAllowReadWriteOverflow15(void * __restrict dst, const void * __restrict src, size_t n)
50	{
51	detail::memcpySmallAllowReadWriteOverflow15Impl(reinterpret_cast<char >(dst), reinterpret_cast<const* char *>(src), n);
52	}
53
54	/* NOTE There was also a function, that assumes, that you could read any bytes inside same memory page of src.*
55	* This function was unused, and also it requires special handling for Valgrind and ASan.
56	*/
57
58	#else /// Implementation for other platforms.
59
60	inline void memcpySmallAllowReadWriteOverflow15(void * __restrict dst, const void * __restrict src, size_t n)
61	{
62	memcpy(dst, src, n);
63	}
64
65	#endif
66

Browse the source code of ClickHouse/dbms/src/Common/memcpySmall.h