1/* ----------------------------------------------------------------------------
2Copyright (c) 2018,2019 Microsoft Research, Daan Leijen
3This is free software; you can redistribute it and/or modify it under the
4terms of the MIT license.
5-----------------------------------------------------------------------------*/
6
7/* This is a stress test for the allocator, using multiple threads and
8 transferring objects between threads. This is not a typical workload
9 but uses a random linear size distribution. Timing can also depend on
10 (random) thread scheduling. Do not use this test as a benchmark!
11*/
12
13#include <stdio.h>
14#include <stdlib.h>
15#include <stdint.h>
16#include <stdbool.h>
17#include <string.h>
18#include <mimalloc.h>
19
20// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
21//
22// argument defaults
23static int THREADS = 32; // more repeatable if THREADS <= #processors
24static int SCALE = 50; // scaling factor
25static int ITER = 10; // N full iterations re-creating all threads
26
27// static int THREADS = 8; // more repeatable if THREADS <= #processors
28// static int SCALE = 100; // scaling factor
29
30static bool allow_large_objects = true; // allow very large objects?
31static size_t use_one_size = 0; // use single object size of N uintptr_t?
32
33
34#ifdef USE_STD_MALLOC
35#define custom_malloc(s) malloc(s)
36#define custom_realloc(p,s) realloc(p,s)
37#define custom_free(p) free(p)
38#else
39#define custom_malloc(s) mi_malloc(s)
40#define custom_realloc(p,s) mi_realloc(p,s)
41#define custom_free(p) mi_free(p)
42#endif
43
44// transfer pointer between threads
45#define TRANSFERS (1000)
46static volatile void* transfer[TRANSFERS];
47
48
49#if (UINTPTR_MAX != UINT32_MAX)
50const uintptr_t cookie = 0xbf58476d1ce4e5b9UL;
51#else
52const uintptr_t cookie = 0x1ce4e5b9UL;
53#endif
54
55static void* atomic_exchange_ptr(volatile void** p, void* newval);
56
57typedef uintptr_t* random_t;
58
59static uintptr_t pick(random_t r) {
60 uintptr_t x = *r;
61#if (UINTPTR_MAX > UINT32_MAX)
62 // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
63 x ^= x >> 30;
64 x *= 0xbf58476d1ce4e5b9UL;
65 x ^= x >> 27;
66 x *= 0x94d049bb133111ebUL;
67 x ^= x >> 31;
68#else
69 // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
70 x ^= x >> 16;
71 x *= 0x7feb352dUL;
72 x ^= x >> 15;
73 x *= 0x846ca68bUL;
74 x ^= x >> 16;
75#endif
76 *r = x;
77 return x;
78}
79
80static bool chance(size_t perc, random_t r) {
81 return (pick(r) % 100 <= perc);
82}
83
84static void* alloc_items(size_t items, random_t r) {
85 if (chance(1, r)) {
86 if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant
87 else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge
88 else items *= 100; // 1% large objects;
89 }
90 if (items == 40) items++; // pthreads uses that size for stack increases
91 if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
92 uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t));
93 if (p != NULL) {
94 for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
95 }
96 return p;
97}
98
99static void free_items(void* p) {
100 if (p != NULL) {
101 uintptr_t* q = (uintptr_t*)p;
102 uintptr_t items = (q[0] ^ cookie);
103 for (uintptr_t i = 0; i < items; i++) {
104 if ((q[i] ^ cookie) != items - i) {
105 fprintf(stderr, "memory corruption at block %p at %zu\n", p, i);
106 abort();
107 }
108 }
109 }
110 custom_free(p);
111}
112
113
114static void stress(intptr_t tid) {
115 //bench_start_thread();
116 uintptr_t r = tid * 43;
117 const size_t max_item_shift = 5; // 128
118 const size_t max_item_retained_shift = max_item_shift + 2;
119 size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
120 size_t retain = allocs / 2;
121 void** data = NULL;
122 size_t data_size = 0;
123 size_t data_top = 0;
124 void** retained = (void**)custom_malloc(retain * sizeof(void*));
125 size_t retain_top = 0;
126
127 while (allocs > 0 || retain > 0) {
128 if (retain == 0 || (chance(50, &r) && allocs > 0)) {
129 // 50%+ alloc
130 allocs--;
131 if (data_top >= data_size) {
132 data_size += 100000;
133 data = (void**)custom_realloc(data, data_size * sizeof(void*));
134 }
135 data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r);
136 }
137 else {
138 // 25% retain
139 retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r);
140 retain--;
141 }
142 if (chance(66, &r) && data_top > 0) {
143 // 66% free previous alloc
144 size_t idx = pick(&r) % data_top;
145 free_items(data[idx]);
146 data[idx] = NULL;
147 }
148 if (chance(25, &r) && data_top > 0) {
149 // 25% exchange a local pointer with the (shared) transfer buffer.
150 size_t data_idx = pick(&r) % data_top;
151 size_t transfer_idx = pick(&r) % TRANSFERS;
152 void* p = data[data_idx];
153 void* q = atomic_exchange_ptr(&transfer[transfer_idx], p);
154 data[data_idx] = q;
155 }
156 }
157 // free everything that is left
158 for (size_t i = 0; i < retain_top; i++) {
159 free_items(retained[i]);
160 }
161 for (size_t i = 0; i < data_top; i++) {
162 free_items(data[i]);
163 }
164 custom_free(retained);
165 custom_free(data);
166 //bench_end_thread();
167}
168
169static void run_os_threads(size_t nthreads);
170
171int main(int argc, char** argv) {
172 // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
173 if (argc >= 2) {
174 char* end;
175 long n = strtol(argv[1], &end, 10);
176 if (n > 0) THREADS = n;
177 }
178 if (argc >= 3) {
179 char* end;
180 long n = (strtol(argv[2], &end, 10));
181 if (n > 0) SCALE = n;
182 }
183 if (argc >= 4) {
184 char* end;
185 long n = (strtol(argv[3], &end, 10));
186 if (n > 0) ITER = n;
187 }
188 printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
189 //int res = mi_reserve_huge_os_pages(4,1);
190 //printf("(reserve huge: %i\n)", res);
191
192 //bench_start_program();
193
194 // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
195 mi_stats_reset();
196 uintptr_t r = 43 * 43;
197 for (int n = 0; n < ITER; n++) {
198 run_os_threads(THREADS);
199 for (int i = 0; i < TRANSFERS; i++) {
200 if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
201 void* p = atomic_exchange_ptr(&transfer[i], NULL);
202 free_items(p);
203 }
204 }
205 mi_collect(false);
206#ifndef NDEBUG
207 if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); }
208#endif
209 }
210
211 mi_collect(true);
212 mi_stats_print(NULL);
213 //bench_end_program();
214 return 0;
215}
216
217
218#ifdef _WIN32
219
220#include <windows.h>
221
222static DWORD WINAPI thread_entry(LPVOID param) {
223 stress((intptr_t)param);
224 return 0;
225}
226
227static void run_os_threads(size_t nthreads) {
228 DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD));
229 HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE));
230 for (uintptr_t i = 0; i < nthreads; i++) {
231 thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]);
232 }
233 for (size_t i = 0; i < nthreads; i++) {
234 WaitForSingleObject(thandles[i], INFINITE);
235 }
236 for (size_t i = 0; i < nthreads; i++) {
237 CloseHandle(thandles[i]);
238 }
239 custom_free(tids);
240 custom_free(thandles);
241}
242
243static void* atomic_exchange_ptr(volatile void** p, void* newval) {
244#if (INTPTR_MAX == UINT32_MAX)
245 return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
246#else
247 return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
248#endif
249}
250#else
251
252#include <pthread.h>
253#include <stdatomic.h>
254
255static void* thread_entry(void* param) {
256 stress((uintptr_t)param);
257 return NULL;
258}
259
260static void run_os_threads(size_t nthreads) {
261 pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t));
262 memset(threads, 0, sizeof(pthread_t) * nthreads);
263 //pthread_setconcurrency(nthreads);
264 for (uintptr_t i = 0; i < nthreads; i++) {
265 pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
266 }
267 for (size_t i = 0; i < nthreads; i++) {
268 pthread_join(threads[i], NULL);
269 }
270 custom_free(threads);
271}
272
273static void* atomic_exchange_ptr(volatile void** p, void* newval) {
274 return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire);
275}
276
277#endif
278