memory.h source code [arrow/arrow/util/memory.h]

1	// Licensed to the Apache Software Foundation (ASF) under one
2	// or more contributor license agreements. See the NOTICE file
3	// distributed with this work for additional information
4	// regarding copyright ownership. The ASF licenses this file
5	// to you under the Apache License, Version 2.0 (the
6	// "License"); you may not use this file except in compliance
7	// with the License. You may obtain a copy of the License at
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing,
12	// software distributed under the License is distributed on an
13	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14	// KIND, either express or implied. See the License for the
15	// specific language governing permissions and limitations
16	// under the License.
17
18	#ifndef ARROW_UTIL_MEMORY_H
19	#define ARROW_UTIL_MEMORY_H
20
21	#include <thread>
22	#include <vector>
23
24	#include "arrow/util/thread-pool.h"
25
26	namespace arrow {
27	namespace internal {
28
29	uint8_t* pointer_logical_and(const uint8_t* address, uintptr_t bits) {
30	uintptr_t value = reinterpret_cast<uintptr_t>(address);
31	return reinterpret_cast<uint8_t*>(value & bits);
32	}
33
34	// This function is just for avoiding MinGW-w64 32bit crash.
35	// See also: https://sourceforge.net/p/mingw-w64/bugs/767/
36	void* wrap_memcpy(void* dst, const void* src, size_t n) { return memcpy(dst, src, n); }
37
38	// A helper function for doing memcpy with multiple threads. This is required
39	// to saturate the memory bandwidth of modern cpus.
40	void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
41	uintptr_t block_size, int num_threads) {
42	// XXX This function is really using `num_threads + 1` threads.
43	auto pool = GetCpuThreadPool();
44
45	uint8_t* left = pointer_logical_and(src + block_size - `1`, ~(block_size - `1`));
46	uint8_t* right = pointer_logical_and(src + nbytes, ~(block_size - `1`));
47	int64_t num_blocks = (right - left) / block_size;
48
49	// Update right address
50	right = right - (num_blocks % num_threads) * block_size;
51
52	// Now we divide these blocks between available threads. The remainder is
53	// handled separately.
54	size_t chunk_size = (right - left) / num_threads;
55	int64_t prefix = left - src;
56	int64_t suffix = src + nbytes - right;
57	// Now the data layout is \| prefix \| k num_threads * block_size \| suffix \|.*
58	// We have chunk_size = k block_size, therefore the data layout is*
59	// \| prefix \| num_threads chunk_size \| suffix \|.*
60	// Each thread gets a "chunk" of k blocks.
61
62	// Start all parallel memcpy tasks and handle leftovers while threads run.
63	std::vector<std::future<void*>> futures;
64
65	for (int i = `0`; i < num_threads; i++) {
66	futures.emplace_back(pool->Submit(wrap_memcpy, dst + prefix + i * chunk_size,
67	left + i * chunk_size, chunk_size));
68	}
69	memcpy(dst, src, prefix);
70	memcpy(dst + prefix + num_threads * chunk_size, right, suffix);
71
72	for (auto& fut : futures) {
73	fut.get();
74	}
75	}
76
77	} // namespace internal
78	} // namespace arrow
79
80	#endif // ARROW_UTIL_MEMORY_H
81

Browse the source code of arrow/arrow/util/memory.h