1 | /* ---------------------------------------------------------------------------- |
2 | Copyright (c) 2018,2019 Microsoft Research, Daan Leijen |
3 | This is free software; you can redistribute it and/or modify it under the |
4 | terms of the MIT license. |
5 | -----------------------------------------------------------------------------*/ |
6 | |
7 | /* This is a stress test for the allocator, using multiple threads and |
8 | transferring objects between threads. This is not a typical workload |
9 | but uses a random linear size distribution. Timing can also depend on |
10 | (random) thread scheduling. Do not use this test as a benchmark! |
11 | */ |
12 | |
13 | #include <stdio.h> |
14 | #include <stdlib.h> |
15 | #include <stdint.h> |
16 | #include <stdbool.h> |
17 | #include <string.h> |
18 | #include <mimalloc.h> |
19 | |
20 | // > mimalloc-test-stress [THREADS] [SCALE] [ITER] |
21 | // |
22 | // argument defaults |
23 | static int THREADS = 32; // more repeatable if THREADS <= #processors |
24 | static int SCALE = 50; // scaling factor |
25 | static int ITER = 10; // N full iterations re-creating all threads |
26 | |
27 | // static int THREADS = 8; // more repeatable if THREADS <= #processors |
28 | // static int SCALE = 100; // scaling factor |
29 | |
30 | static bool allow_large_objects = true; // allow very large objects? |
31 | static size_t use_one_size = 0; // use single object size of N uintptr_t? |
32 | |
33 | |
34 | #ifdef USE_STD_MALLOC |
35 | #define custom_malloc(s) malloc(s) |
36 | #define custom_realloc(p,s) realloc(p,s) |
37 | #define custom_free(p) free(p) |
38 | #else |
39 | #define custom_malloc(s) mi_malloc(s) |
40 | #define custom_realloc(p,s) mi_realloc(p,s) |
41 | #define custom_free(p) mi_free(p) |
42 | #endif |
43 | |
44 | // transfer pointer between threads |
45 | #define TRANSFERS (1000) |
46 | static volatile void* transfer[TRANSFERS]; |
47 | |
48 | |
49 | #if (UINTPTR_MAX != UINT32_MAX) |
50 | const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; |
51 | #else |
52 | const uintptr_t cookie = 0x1ce4e5b9UL; |
53 | #endif |
54 | |
55 | static void* atomic_exchange_ptr(volatile void** p, void* newval); |
56 | |
57 | typedef uintptr_t* random_t; |
58 | |
59 | static uintptr_t pick(random_t r) { |
60 | uintptr_t x = *r; |
61 | #if (UINTPTR_MAX > UINT32_MAX) |
62 | // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c> |
63 | x ^= x >> 30; |
64 | x *= 0xbf58476d1ce4e5b9UL; |
65 | x ^= x >> 27; |
66 | x *= 0x94d049bb133111ebUL; |
67 | x ^= x >> 31; |
68 | #else |
69 | // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/> |
70 | x ^= x >> 16; |
71 | x *= 0x7feb352dUL; |
72 | x ^= x >> 15; |
73 | x *= 0x846ca68bUL; |
74 | x ^= x >> 16; |
75 | #endif |
76 | *r = x; |
77 | return x; |
78 | } |
79 | |
80 | static bool chance(size_t perc, random_t r) { |
81 | return (pick(r) % 100 <= perc); |
82 | } |
83 | |
84 | static void* alloc_items(size_t items, random_t r) { |
85 | if (chance(1, r)) { |
86 | if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant |
87 | else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge |
88 | else items *= 100; // 1% large objects; |
89 | } |
90 | if (items == 40) items++; // pthreads uses that size for stack increases |
91 | if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); |
92 | uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t)); |
93 | if (p != NULL) { |
94 | for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; |
95 | } |
96 | return p; |
97 | } |
98 | |
99 | static void free_items(void* p) { |
100 | if (p != NULL) { |
101 | uintptr_t* q = (uintptr_t*)p; |
102 | uintptr_t items = (q[0] ^ cookie); |
103 | for (uintptr_t i = 0; i < items; i++) { |
104 | if ((q[i] ^ cookie) != items - i) { |
105 | fprintf(stderr, "memory corruption at block %p at %zu\n" , p, i); |
106 | abort(); |
107 | } |
108 | } |
109 | } |
110 | custom_free(p); |
111 | } |
112 | |
113 | |
114 | static void stress(intptr_t tid) { |
115 | //bench_start_thread(); |
116 | uintptr_t r = tid * 43; |
117 | const size_t max_item_shift = 5; // 128 |
118 | const size_t max_item_retained_shift = max_item_shift + 2; |
119 | size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more |
120 | size_t retain = allocs / 2; |
121 | void** data = NULL; |
122 | size_t data_size = 0; |
123 | size_t data_top = 0; |
124 | void** retained = (void**)custom_malloc(retain * sizeof(void*)); |
125 | size_t retain_top = 0; |
126 | |
127 | while (allocs > 0 || retain > 0) { |
128 | if (retain == 0 || (chance(50, &r) && allocs > 0)) { |
129 | // 50%+ alloc |
130 | allocs--; |
131 | if (data_top >= data_size) { |
132 | data_size += 100000; |
133 | data = (void**)custom_realloc(data, data_size * sizeof(void*)); |
134 | } |
135 | data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r); |
136 | } |
137 | else { |
138 | // 25% retain |
139 | retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r); |
140 | retain--; |
141 | } |
142 | if (chance(66, &r) && data_top > 0) { |
143 | // 66% free previous alloc |
144 | size_t idx = pick(&r) % data_top; |
145 | free_items(data[idx]); |
146 | data[idx] = NULL; |
147 | } |
148 | if (chance(25, &r) && data_top > 0) { |
149 | // 25% exchange a local pointer with the (shared) transfer buffer. |
150 | size_t data_idx = pick(&r) % data_top; |
151 | size_t transfer_idx = pick(&r) % TRANSFERS; |
152 | void* p = data[data_idx]; |
153 | void* q = atomic_exchange_ptr(&transfer[transfer_idx], p); |
154 | data[data_idx] = q; |
155 | } |
156 | } |
157 | // free everything that is left |
158 | for (size_t i = 0; i < retain_top; i++) { |
159 | free_items(retained[i]); |
160 | } |
161 | for (size_t i = 0; i < data_top; i++) { |
162 | free_items(data[i]); |
163 | } |
164 | custom_free(retained); |
165 | custom_free(data); |
166 | //bench_end_thread(); |
167 | } |
168 | |
169 | static void run_os_threads(size_t nthreads); |
170 | |
171 | int main(int argc, char** argv) { |
172 | // > mimalloc-test-stress [THREADS] [SCALE] [ITER] |
173 | if (argc >= 2) { |
174 | char* end; |
175 | long n = strtol(argv[1], &end, 10); |
176 | if (n > 0) THREADS = n; |
177 | } |
178 | if (argc >= 3) { |
179 | char* end; |
180 | long n = (strtol(argv[2], &end, 10)); |
181 | if (n > 0) SCALE = n; |
182 | } |
183 | if (argc >= 4) { |
184 | char* end; |
185 | long n = (strtol(argv[3], &end, 10)); |
186 | if (n > 0) ITER = n; |
187 | } |
188 | printf("start with %d threads with a %d%% load-per-thread and %d iterations\n" , THREADS, SCALE, ITER); |
189 | //int res = mi_reserve_huge_os_pages(4,1); |
190 | //printf("(reserve huge: %i\n)", res); |
191 | |
192 | //bench_start_program(); |
193 | |
194 | // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. |
195 | mi_stats_reset(); |
196 | uintptr_t r = 43 * 43; |
197 | for (int n = 0; n < ITER; n++) { |
198 | run_os_threads(THREADS); |
199 | for (int i = 0; i < TRANSFERS; i++) { |
200 | if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers |
201 | void* p = atomic_exchange_ptr(&transfer[i], NULL); |
202 | free_items(p); |
203 | } |
204 | } |
205 | mi_collect(false); |
206 | #ifndef NDEBUG |
207 | if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n" , n + 1); } |
208 | #endif |
209 | } |
210 | |
211 | mi_collect(true); |
212 | mi_stats_print(NULL); |
213 | //bench_end_program(); |
214 | return 0; |
215 | } |
216 | |
217 | |
218 | #ifdef _WIN32 |
219 | |
220 | #include <windows.h> |
221 | |
222 | static DWORD WINAPI thread_entry(LPVOID param) { |
223 | stress((intptr_t)param); |
224 | return 0; |
225 | } |
226 | |
227 | static void run_os_threads(size_t nthreads) { |
228 | DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD)); |
229 | HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE)); |
230 | for (uintptr_t i = 0; i < nthreads; i++) { |
231 | thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); |
232 | } |
233 | for (size_t i = 0; i < nthreads; i++) { |
234 | WaitForSingleObject(thandles[i], INFINITE); |
235 | } |
236 | for (size_t i = 0; i < nthreads; i++) { |
237 | CloseHandle(thandles[i]); |
238 | } |
239 | custom_free(tids); |
240 | custom_free(thandles); |
241 | } |
242 | |
243 | static void* atomic_exchange_ptr(volatile void** p, void* newval) { |
244 | #if (INTPTR_MAX == UINT32_MAX) |
245 | return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); |
246 | #else |
247 | return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); |
248 | #endif |
249 | } |
250 | #else |
251 | |
252 | #include <pthread.h> |
253 | #include <stdatomic.h> |
254 | |
255 | static void* thread_entry(void* param) { |
256 | stress((uintptr_t)param); |
257 | return NULL; |
258 | } |
259 | |
260 | static void run_os_threads(size_t nthreads) { |
261 | pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t)); |
262 | memset(threads, 0, sizeof(pthread_t) * nthreads); |
263 | //pthread_setconcurrency(nthreads); |
264 | for (uintptr_t i = 0; i < nthreads; i++) { |
265 | pthread_create(&threads[i], NULL, &thread_entry, (void*)i); |
266 | } |
267 | for (size_t i = 0; i < nthreads; i++) { |
268 | pthread_join(threads[i], NULL); |
269 | } |
270 | custom_free(threads); |
271 | } |
272 | |
273 | static void* atomic_exchange_ptr(volatile void** p, void* newval) { |
274 | return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire); |
275 | } |
276 | |
277 | #endif |
278 | |