1 | /* Copyright (c) 2014, Google Inc. |
2 | * |
3 | * Permission to use, copy, modify, and/or distribute this software for any |
4 | * purpose with or without fee is hereby granted, provided that the above |
5 | * copyright notice and this permission notice appear in all copies. |
6 | * |
7 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
8 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
9 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
10 | * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
11 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
12 | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
13 | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
14 | |
15 | #include <openssl/rand.h> |
16 | |
17 | #include <assert.h> |
18 | #include <limits.h> |
19 | #include <string.h> |
20 | |
21 | #if defined(BORINGSSL_FIPS) |
22 | #include <unistd.h> |
23 | #endif |
24 | |
25 | #include <openssl/chacha.h> |
26 | #include <openssl/cpu.h> |
27 | #include <openssl/mem.h> |
28 | |
29 | #include "internal.h" |
30 | #include "../../internal.h" |
31 | #include "../delocate.h" |
32 | |
33 | |
34 | // It's assumed that the operating system always has an unfailing source of |
35 | // entropy which is accessed via |CRYPTO_sysrand|. (If the operating system |
36 | // entropy source fails, it's up to |CRYPTO_sysrand| to abort the process—we |
37 | // don't try to handle it.) |
38 | // |
39 | // In addition, the hardware may provide a low-latency RNG. Intel's rdrand |
40 | // instruction is the canonical example of this. When a hardware RNG is |
41 | // available we don't need to worry about an RNG failure arising from fork()ing |
42 | // the process or moving a VM, so we can keep thread-local RNG state and use it |
43 | // as an additional-data input to CTR-DRBG. |
44 | // |
45 | // (We assume that the OS entropy is safe from fork()ing and VM duplication. |
46 | // This might be a bit of a leap of faith, esp on Windows, but there's nothing |
47 | // that we can do about it.) |
48 | |
49 | // kReseedInterval is the number of generate calls made to CTR-DRBG before |
50 | // reseeding. |
51 | static const unsigned kReseedInterval = 4096; |
52 | |
53 | // CRNGT_BLOCK_SIZE is the number of bytes in a “block” for the purposes of the |
54 | // continuous random number generator test in FIPS 140-2, section 4.9.2. |
55 | #define CRNGT_BLOCK_SIZE 16 |
56 | |
57 | // rand_thread_state contains the per-thread state for the RNG. |
58 | struct rand_thread_state { |
59 | CTR_DRBG_STATE drbg; |
60 | // calls is the number of generate calls made on |drbg| since it was last |
61 | // (re)seeded. This is bound by |kReseedInterval|. |
62 | unsigned calls; |
63 | // last_block_valid is non-zero iff |last_block| contains data from |
64 | // |CRYPTO_sysrand|. |
65 | int last_block_valid; |
66 | |
67 | #if defined(BORINGSSL_FIPS) |
68 | // last_block contains the previous block from |CRYPTO_sysrand|. |
69 | uint8_t last_block[CRNGT_BLOCK_SIZE]; |
70 | // next and prev form a NULL-terminated, double-linked list of all states in |
71 | // a process. |
72 | struct rand_thread_state *next, *prev; |
73 | #endif |
74 | }; |
75 | |
76 | #if defined(BORINGSSL_FIPS) |
77 | // thread_states_list is the head of a linked-list of all |rand_thread_state| |
78 | // objects in the process, one per thread. This is needed because FIPS requires |
79 | // that they be zeroed on process exit, but thread-local destructors aren't |
80 | // called when the whole process is exiting. |
81 | DEFINE_BSS_GET(struct rand_thread_state *, thread_states_list); |
82 | DEFINE_STATIC_MUTEX(thread_states_list_lock); |
83 | |
84 | static void rand_thread_state_clear_all(void) __attribute__((destructor)); |
85 | static void rand_thread_state_clear_all(void) { |
86 | CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); |
87 | for (struct rand_thread_state *cur = *thread_states_list_bss_get(); |
88 | cur != NULL; cur = cur->next) { |
89 | CTR_DRBG_clear(&cur->drbg); |
90 | } |
91 | // |thread_states_list_lock is deliberately left locked so that any threads |
92 | // that are still running will hang if they try to call |RAND_bytes|. |
93 | } |
94 | #endif |
95 | |
96 | // rand_thread_state_free frees a |rand_thread_state|. This is called when a |
97 | // thread exits. |
98 | static void rand_thread_state_free(void *state_in) { |
99 | struct rand_thread_state *state = state_in; |
100 | |
101 | if (state_in == NULL) { |
102 | return; |
103 | } |
104 | |
105 | #if defined(BORINGSSL_FIPS) |
106 | CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); |
107 | |
108 | if (state->prev != NULL) { |
109 | state->prev->next = state->next; |
110 | } else { |
111 | *thread_states_list_bss_get() = state->next; |
112 | } |
113 | |
114 | if (state->next != NULL) { |
115 | state->next->prev = state->prev; |
116 | } |
117 | |
118 | CRYPTO_STATIC_MUTEX_unlock_write(thread_states_list_lock_bss_get()); |
119 | |
120 | CTR_DRBG_clear(&state->drbg); |
121 | #endif |
122 | |
123 | OPENSSL_free(state); |
124 | } |
125 | |
126 | #if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \ |
127 | !defined(BORINGSSL_UNSAFE_DETERMINISTIC_MODE) |
128 | static int hwrand(uint8_t *buf, const size_t len) { |
129 | if (!have_rdrand()) { |
130 | return 0; |
131 | } |
132 | |
133 | const size_t len_multiple8 = len & ~7; |
134 | if (!CRYPTO_rdrand_multiple8_buf(buf, len_multiple8)) { |
135 | return 0; |
136 | } |
137 | const size_t remainder = len - len_multiple8; |
138 | |
139 | if (remainder != 0) { |
140 | assert(remainder < 8); |
141 | |
142 | uint8_t rand_buf[8]; |
143 | if (!CRYPTO_rdrand(rand_buf)) { |
144 | return 0; |
145 | } |
146 | OPENSSL_memcpy(buf + len_multiple8, rand_buf, remainder); |
147 | } |
148 | |
149 | #if defined(BORINGSSL_FIPS_BREAK_CRNG) |
150 | // This breaks the "continuous random number generator test" defined in FIPS |
151 | // 140-2, section 4.9.2, and implemented in rand_get_seed(). |
152 | OPENSSL_memset(buf, 0, len); |
153 | #endif |
154 | |
155 | return 1; |
156 | } |
157 | |
158 | #else |
159 | |
160 | static int hwrand(uint8_t *buf, size_t len) { |
161 | return 0; |
162 | } |
163 | |
164 | #endif |
165 | |
166 | #if defined(BORINGSSL_FIPS) |
167 | |
168 | static void rand_get_seed(struct rand_thread_state *state, |
169 | uint8_t seed[CTR_DRBG_ENTROPY_LEN]) { |
170 | if (!state->last_block_valid) { |
171 | if (!hwrand(state->last_block, sizeof(state->last_block))) { |
172 | CRYPTO_sysrand(state->last_block, sizeof(state->last_block)); |
173 | } |
174 | state->last_block_valid = 1; |
175 | } |
176 | |
177 | // We overread from /dev/urandom or RDRAND by a factor of 10 and XOR to |
178 | // whiten. |
179 | #define FIPS_OVERREAD 10 |
180 | uint8_t entropy[CTR_DRBG_ENTROPY_LEN * FIPS_OVERREAD]; |
181 | |
182 | if (!hwrand(entropy, sizeof(entropy))) { |
183 | CRYPTO_sysrand(entropy, sizeof(entropy)); |
184 | } |
185 | |
186 | // See FIPS 140-2, section 4.9.2. This is the “continuous random number |
187 | // generator test” which causes the program to randomly abort. Hopefully the |
188 | // rate of failure is small enough not to be a problem in practice. |
189 | if (CRYPTO_memcmp(state->last_block, entropy, CRNGT_BLOCK_SIZE) == 0) { |
190 | fprintf(stderr, "CRNGT failed.\n" ); |
191 | BORINGSSL_FIPS_abort(); |
192 | } |
193 | |
194 | for (size_t i = CRNGT_BLOCK_SIZE; i < sizeof(entropy); |
195 | i += CRNGT_BLOCK_SIZE) { |
196 | if (CRYPTO_memcmp(entropy + i - CRNGT_BLOCK_SIZE, entropy + i, |
197 | CRNGT_BLOCK_SIZE) == 0) { |
198 | fprintf(stderr, "CRNGT failed.\n" ); |
199 | BORINGSSL_FIPS_abort(); |
200 | } |
201 | } |
202 | OPENSSL_memcpy(state->last_block, |
203 | entropy + sizeof(entropy) - CRNGT_BLOCK_SIZE, |
204 | CRNGT_BLOCK_SIZE); |
205 | |
206 | OPENSSL_memcpy(seed, entropy, CTR_DRBG_ENTROPY_LEN); |
207 | |
208 | for (size_t i = 1; i < FIPS_OVERREAD; i++) { |
209 | for (size_t j = 0; j < CTR_DRBG_ENTROPY_LEN; j++) { |
210 | seed[j] ^= entropy[CTR_DRBG_ENTROPY_LEN * i + j]; |
211 | } |
212 | } |
213 | } |
214 | |
215 | #else |
216 | |
217 | static void rand_get_seed(struct rand_thread_state *state, |
218 | uint8_t seed[CTR_DRBG_ENTROPY_LEN]) { |
219 | // If not in FIPS mode, we don't overread from the system entropy source and |
220 | // we don't depend only on the hardware RDRAND. |
221 | CRYPTO_sysrand(seed, CTR_DRBG_ENTROPY_LEN); |
222 | } |
223 | |
224 | #endif |
225 | |
226 | void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len, |
227 | const uint8_t user_additional_data[32]) { |
228 | if (out_len == 0) { |
229 | return; |
230 | } |
231 | |
232 | // Additional data is mixed into every CTR-DRBG call to protect, as best we |
233 | // can, against forks & VM clones. We do not over-read this information and |
234 | // don't reseed with it so, from the point of view of FIPS, this doesn't |
235 | // provide “prediction resistance”. But, in practice, it does. |
236 | uint8_t additional_data[32]; |
237 | if (!hwrand(additional_data, sizeof(additional_data))) { |
238 | // Without a hardware RNG to save us from address-space duplication, the OS |
239 | // entropy is used. This can be expensive (one read per |RAND_bytes| call) |
240 | // and so can be disabled by applications that we have ensured don't fork |
241 | // and aren't at risk of VM cloning. |
242 | if (!rand_fork_unsafe_buffering_enabled()) { |
243 | CRYPTO_sysrand(additional_data, sizeof(additional_data)); |
244 | } else { |
245 | OPENSSL_memset(additional_data, 0, sizeof(additional_data)); |
246 | } |
247 | } |
248 | |
249 | for (size_t i = 0; i < sizeof(additional_data); i++) { |
250 | additional_data[i] ^= user_additional_data[i]; |
251 | } |
252 | |
253 | struct rand_thread_state stack_state; |
254 | struct rand_thread_state *state = |
255 | CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_RAND); |
256 | |
257 | if (state == NULL) { |
258 | state = OPENSSL_malloc(sizeof(struct rand_thread_state)); |
259 | if (state == NULL || |
260 | !CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_RAND, state, |
261 | rand_thread_state_free)) { |
262 | // If the system is out of memory, use an ephemeral state on the |
263 | // stack. |
264 | state = &stack_state; |
265 | } |
266 | |
267 | state->last_block_valid = 0; |
268 | uint8_t seed[CTR_DRBG_ENTROPY_LEN]; |
269 | rand_get_seed(state, seed); |
270 | if (!CTR_DRBG_init(&state->drbg, seed, NULL, 0)) { |
271 | abort(); |
272 | } |
273 | state->calls = 0; |
274 | |
275 | #if defined(BORINGSSL_FIPS) |
276 | if (state != &stack_state) { |
277 | CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); |
278 | struct rand_thread_state **states_list = thread_states_list_bss_get(); |
279 | state->next = *states_list; |
280 | if (state->next != NULL) { |
281 | state->next->prev = state; |
282 | } |
283 | state->prev = NULL; |
284 | *states_list = state; |
285 | CRYPTO_STATIC_MUTEX_unlock_write(thread_states_list_lock_bss_get()); |
286 | } |
287 | #endif |
288 | } |
289 | |
290 | if (state->calls >= kReseedInterval) { |
291 | uint8_t seed[CTR_DRBG_ENTROPY_LEN]; |
292 | rand_get_seed(state, seed); |
293 | #if defined(BORINGSSL_FIPS) |
294 | // Take a read lock around accesses to |state->drbg|. This is needed to |
295 | // avoid returning bad entropy if we race with |
296 | // |rand_thread_state_clear_all|. |
297 | // |
298 | // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a |
299 | // bug on ppc64le. glibc may implement pthread locks by wrapping user code |
300 | // in a hardware transaction, but, on some older versions of glibc and the |
301 | // kernel, syscalls made with |syscall| did not abort the transaction. |
302 | CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get()); |
303 | #endif |
304 | if (!CTR_DRBG_reseed(&state->drbg, seed, NULL, 0)) { |
305 | abort(); |
306 | } |
307 | state->calls = 0; |
308 | } else { |
309 | #if defined(BORINGSSL_FIPS) |
310 | CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get()); |
311 | #endif |
312 | } |
313 | |
314 | int first_call = 1; |
315 | while (out_len > 0) { |
316 | size_t todo = out_len; |
317 | if (todo > CTR_DRBG_MAX_GENERATE_LENGTH) { |
318 | todo = CTR_DRBG_MAX_GENERATE_LENGTH; |
319 | } |
320 | |
321 | if (!CTR_DRBG_generate(&state->drbg, out, todo, additional_data, |
322 | first_call ? sizeof(additional_data) : 0)) { |
323 | abort(); |
324 | } |
325 | |
326 | out += todo; |
327 | out_len -= todo; |
328 | // Though we only check before entering the loop, this cannot add enough to |
329 | // overflow a |size_t|. |
330 | state->calls++; |
331 | first_call = 0; |
332 | } |
333 | |
334 | if (state == &stack_state) { |
335 | CTR_DRBG_clear(&state->drbg); |
336 | } |
337 | |
338 | #if defined(BORINGSSL_FIPS) |
339 | CRYPTO_STATIC_MUTEX_unlock_read(thread_states_list_lock_bss_get()); |
340 | #endif |
341 | } |
342 | |
343 | int RAND_bytes(uint8_t *out, size_t out_len) { |
344 | static const uint8_t kZeroAdditionalData[32] = {0}; |
345 | RAND_bytes_with_additional_data(out, out_len, kZeroAdditionalData); |
346 | return 1; |
347 | } |
348 | |
349 | int RAND_pseudo_bytes(uint8_t *buf, size_t len) { |
350 | return RAND_bytes(buf, len); |
351 | } |
352 | |