| 1 | /* Copyright (c) 2014, Google Inc. |
| 2 | * |
| 3 | * Permission to use, copy, modify, and/or distribute this software for any |
| 4 | * purpose with or without fee is hereby granted, provided that the above |
| 5 | * copyright notice and this permission notice appear in all copies. |
| 6 | * |
| 7 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 8 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 9 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
| 10 | * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 11 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 12 | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 13 | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
| 14 | |
| 15 | #include <openssl/rand.h> |
| 16 | |
| 17 | #include <assert.h> |
| 18 | #include <limits.h> |
| 19 | #include <string.h> |
| 20 | |
| 21 | #if defined(BORINGSSL_FIPS) |
| 22 | #include <unistd.h> |
| 23 | #endif |
| 24 | |
| 25 | #include <openssl/chacha.h> |
| 26 | #include <openssl/cpu.h> |
| 27 | #include <openssl/mem.h> |
| 28 | |
| 29 | #include "internal.h" |
| 30 | #include "../../internal.h" |
| 31 | #include "../delocate.h" |
| 32 | |
| 33 | |
| 34 | // It's assumed that the operating system always has an unfailing source of |
| 35 | // entropy which is accessed via |CRYPTO_sysrand|. (If the operating system |
| 36 | // entropy source fails, it's up to |CRYPTO_sysrand| to abort the process—we |
| 37 | // don't try to handle it.) |
| 38 | // |
| 39 | // In addition, the hardware may provide a low-latency RNG. Intel's rdrand |
| 40 | // instruction is the canonical example of this. When a hardware RNG is |
| 41 | // available we don't need to worry about an RNG failure arising from fork()ing |
| 42 | // the process or moving a VM, so we can keep thread-local RNG state and use it |
| 43 | // as an additional-data input to CTR-DRBG. |
| 44 | // |
| 45 | // (We assume that the OS entropy is safe from fork()ing and VM duplication. |
| 46 | // This might be a bit of a leap of faith, esp on Windows, but there's nothing |
| 47 | // that we can do about it.) |
| 48 | |
| 49 | // kReseedInterval is the number of generate calls made to CTR-DRBG before |
| 50 | // reseeding. |
| 51 | static const unsigned kReseedInterval = 4096; |
| 52 | |
| 53 | // CRNGT_BLOCK_SIZE is the number of bytes in a “block” for the purposes of the |
| 54 | // continuous random number generator test in FIPS 140-2, section 4.9.2. |
| 55 | #define CRNGT_BLOCK_SIZE 16 |
| 56 | |
| 57 | // rand_thread_state contains the per-thread state for the RNG. |
| 58 | struct rand_thread_state { |
| 59 | CTR_DRBG_STATE drbg; |
| 60 | // calls is the number of generate calls made on |drbg| since it was last |
| 61 | // (re)seeded. This is bound by |kReseedInterval|. |
| 62 | unsigned calls; |
| 63 | // last_block_valid is non-zero iff |last_block| contains data from |
| 64 | // |CRYPTO_sysrand|. |
| 65 | int last_block_valid; |
| 66 | |
| 67 | #if defined(BORINGSSL_FIPS) |
| 68 | // last_block contains the previous block from |CRYPTO_sysrand|. |
| 69 | uint8_t last_block[CRNGT_BLOCK_SIZE]; |
| 70 | // next and prev form a NULL-terminated, double-linked list of all states in |
| 71 | // a process. |
| 72 | struct rand_thread_state *next, *prev; |
| 73 | #endif |
| 74 | }; |
| 75 | |
| 76 | #if defined(BORINGSSL_FIPS) |
| 77 | // thread_states_list is the head of a linked-list of all |rand_thread_state| |
| 78 | // objects in the process, one per thread. This is needed because FIPS requires |
| 79 | // that they be zeroed on process exit, but thread-local destructors aren't |
| 80 | // called when the whole process is exiting. |
| 81 | DEFINE_BSS_GET(struct rand_thread_state *, thread_states_list); |
| 82 | DEFINE_STATIC_MUTEX(thread_states_list_lock); |
| 83 | |
| 84 | static void rand_thread_state_clear_all(void) __attribute__((destructor)); |
| 85 | static void rand_thread_state_clear_all(void) { |
| 86 | CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); |
| 87 | for (struct rand_thread_state *cur = *thread_states_list_bss_get(); |
| 88 | cur != NULL; cur = cur->next) { |
| 89 | CTR_DRBG_clear(&cur->drbg); |
| 90 | } |
| 91 | // |thread_states_list_lock is deliberately left locked so that any threads |
| 92 | // that are still running will hang if they try to call |RAND_bytes|. |
| 93 | } |
| 94 | #endif |
| 95 | |
| 96 | // rand_thread_state_free frees a |rand_thread_state|. This is called when a |
| 97 | // thread exits. |
| 98 | static void rand_thread_state_free(void *state_in) { |
| 99 | struct rand_thread_state *state = state_in; |
| 100 | |
| 101 | if (state_in == NULL) { |
| 102 | return; |
| 103 | } |
| 104 | |
| 105 | #if defined(BORINGSSL_FIPS) |
| 106 | CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); |
| 107 | |
| 108 | if (state->prev != NULL) { |
| 109 | state->prev->next = state->next; |
| 110 | } else { |
| 111 | *thread_states_list_bss_get() = state->next; |
| 112 | } |
| 113 | |
| 114 | if (state->next != NULL) { |
| 115 | state->next->prev = state->prev; |
| 116 | } |
| 117 | |
| 118 | CRYPTO_STATIC_MUTEX_unlock_write(thread_states_list_lock_bss_get()); |
| 119 | |
| 120 | CTR_DRBG_clear(&state->drbg); |
| 121 | #endif |
| 122 | |
| 123 | OPENSSL_free(state); |
| 124 | } |
| 125 | |
| 126 | #if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \ |
| 127 | !defined(BORINGSSL_UNSAFE_DETERMINISTIC_MODE) |
| 128 | static int hwrand(uint8_t *buf, const size_t len) { |
| 129 | if (!have_rdrand()) { |
| 130 | return 0; |
| 131 | } |
| 132 | |
| 133 | const size_t len_multiple8 = len & ~7; |
| 134 | if (!CRYPTO_rdrand_multiple8_buf(buf, len_multiple8)) { |
| 135 | return 0; |
| 136 | } |
| 137 | const size_t remainder = len - len_multiple8; |
| 138 | |
| 139 | if (remainder != 0) { |
| 140 | assert(remainder < 8); |
| 141 | |
| 142 | uint8_t rand_buf[8]; |
| 143 | if (!CRYPTO_rdrand(rand_buf)) { |
| 144 | return 0; |
| 145 | } |
| 146 | OPENSSL_memcpy(buf + len_multiple8, rand_buf, remainder); |
| 147 | } |
| 148 | |
| 149 | #if defined(BORINGSSL_FIPS_BREAK_CRNG) |
| 150 | // This breaks the "continuous random number generator test" defined in FIPS |
| 151 | // 140-2, section 4.9.2, and implemented in rand_get_seed(). |
| 152 | OPENSSL_memset(buf, 0, len); |
| 153 | #endif |
| 154 | |
| 155 | return 1; |
| 156 | } |
| 157 | |
| 158 | #else |
| 159 | |
| 160 | static int hwrand(uint8_t *buf, size_t len) { |
| 161 | return 0; |
| 162 | } |
| 163 | |
| 164 | #endif |
| 165 | |
| 166 | #if defined(BORINGSSL_FIPS) |
| 167 | |
| 168 | static void rand_get_seed(struct rand_thread_state *state, |
| 169 | uint8_t seed[CTR_DRBG_ENTROPY_LEN]) { |
| 170 | if (!state->last_block_valid) { |
| 171 | if (!hwrand(state->last_block, sizeof(state->last_block))) { |
| 172 | CRYPTO_sysrand(state->last_block, sizeof(state->last_block)); |
| 173 | } |
| 174 | state->last_block_valid = 1; |
| 175 | } |
| 176 | |
| 177 | // We overread from /dev/urandom or RDRAND by a factor of 10 and XOR to |
| 178 | // whiten. |
| 179 | #define FIPS_OVERREAD 10 |
| 180 | uint8_t entropy[CTR_DRBG_ENTROPY_LEN * FIPS_OVERREAD]; |
| 181 | |
| 182 | if (!hwrand(entropy, sizeof(entropy))) { |
| 183 | CRYPTO_sysrand(entropy, sizeof(entropy)); |
| 184 | } |
| 185 | |
| 186 | // See FIPS 140-2, section 4.9.2. This is the “continuous random number |
| 187 | // generator test” which causes the program to randomly abort. Hopefully the |
| 188 | // rate of failure is small enough not to be a problem in practice. |
| 189 | if (CRYPTO_memcmp(state->last_block, entropy, CRNGT_BLOCK_SIZE) == 0) { |
| 190 | fprintf(stderr, "CRNGT failed.\n" ); |
| 191 | BORINGSSL_FIPS_abort(); |
| 192 | } |
| 193 | |
| 194 | for (size_t i = CRNGT_BLOCK_SIZE; i < sizeof(entropy); |
| 195 | i += CRNGT_BLOCK_SIZE) { |
| 196 | if (CRYPTO_memcmp(entropy + i - CRNGT_BLOCK_SIZE, entropy + i, |
| 197 | CRNGT_BLOCK_SIZE) == 0) { |
| 198 | fprintf(stderr, "CRNGT failed.\n" ); |
| 199 | BORINGSSL_FIPS_abort(); |
| 200 | } |
| 201 | } |
| 202 | OPENSSL_memcpy(state->last_block, |
| 203 | entropy + sizeof(entropy) - CRNGT_BLOCK_SIZE, |
| 204 | CRNGT_BLOCK_SIZE); |
| 205 | |
| 206 | OPENSSL_memcpy(seed, entropy, CTR_DRBG_ENTROPY_LEN); |
| 207 | |
| 208 | for (size_t i = 1; i < FIPS_OVERREAD; i++) { |
| 209 | for (size_t j = 0; j < CTR_DRBG_ENTROPY_LEN; j++) { |
| 210 | seed[j] ^= entropy[CTR_DRBG_ENTROPY_LEN * i + j]; |
| 211 | } |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | #else |
| 216 | |
| 217 | static void rand_get_seed(struct rand_thread_state *state, |
| 218 | uint8_t seed[CTR_DRBG_ENTROPY_LEN]) { |
| 219 | // If not in FIPS mode, we don't overread from the system entropy source and |
| 220 | // we don't depend only on the hardware RDRAND. |
| 221 | CRYPTO_sysrand(seed, CTR_DRBG_ENTROPY_LEN); |
| 222 | } |
| 223 | |
| 224 | #endif |
| 225 | |
| 226 | void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len, |
| 227 | const uint8_t user_additional_data[32]) { |
| 228 | if (out_len == 0) { |
| 229 | return; |
| 230 | } |
| 231 | |
| 232 | // Additional data is mixed into every CTR-DRBG call to protect, as best we |
| 233 | // can, against forks & VM clones. We do not over-read this information and |
| 234 | // don't reseed with it so, from the point of view of FIPS, this doesn't |
| 235 | // provide “prediction resistance”. But, in practice, it does. |
| 236 | uint8_t additional_data[32]; |
| 237 | if (!hwrand(additional_data, sizeof(additional_data))) { |
| 238 | // Without a hardware RNG to save us from address-space duplication, the OS |
| 239 | // entropy is used. This can be expensive (one read per |RAND_bytes| call) |
| 240 | // and so can be disabled by applications that we have ensured don't fork |
| 241 | // and aren't at risk of VM cloning. |
| 242 | if (!rand_fork_unsafe_buffering_enabled()) { |
| 243 | CRYPTO_sysrand(additional_data, sizeof(additional_data)); |
| 244 | } else { |
| 245 | OPENSSL_memset(additional_data, 0, sizeof(additional_data)); |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | for (size_t i = 0; i < sizeof(additional_data); i++) { |
| 250 | additional_data[i] ^= user_additional_data[i]; |
| 251 | } |
| 252 | |
| 253 | struct rand_thread_state stack_state; |
| 254 | struct rand_thread_state *state = |
| 255 | CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_RAND); |
| 256 | |
| 257 | if (state == NULL) { |
| 258 | state = OPENSSL_malloc(sizeof(struct rand_thread_state)); |
| 259 | if (state == NULL || |
| 260 | !CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_RAND, state, |
| 261 | rand_thread_state_free)) { |
| 262 | // If the system is out of memory, use an ephemeral state on the |
| 263 | // stack. |
| 264 | state = &stack_state; |
| 265 | } |
| 266 | |
| 267 | state->last_block_valid = 0; |
| 268 | uint8_t seed[CTR_DRBG_ENTROPY_LEN]; |
| 269 | rand_get_seed(state, seed); |
| 270 | if (!CTR_DRBG_init(&state->drbg, seed, NULL, 0)) { |
| 271 | abort(); |
| 272 | } |
| 273 | state->calls = 0; |
| 274 | |
| 275 | #if defined(BORINGSSL_FIPS) |
| 276 | if (state != &stack_state) { |
| 277 | CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get()); |
| 278 | struct rand_thread_state **states_list = thread_states_list_bss_get(); |
| 279 | state->next = *states_list; |
| 280 | if (state->next != NULL) { |
| 281 | state->next->prev = state; |
| 282 | } |
| 283 | state->prev = NULL; |
| 284 | *states_list = state; |
| 285 | CRYPTO_STATIC_MUTEX_unlock_write(thread_states_list_lock_bss_get()); |
| 286 | } |
| 287 | #endif |
| 288 | } |
| 289 | |
| 290 | if (state->calls >= kReseedInterval) { |
| 291 | uint8_t seed[CTR_DRBG_ENTROPY_LEN]; |
| 292 | rand_get_seed(state, seed); |
| 293 | #if defined(BORINGSSL_FIPS) |
| 294 | // Take a read lock around accesses to |state->drbg|. This is needed to |
| 295 | // avoid returning bad entropy if we race with |
| 296 | // |rand_thread_state_clear_all|. |
| 297 | // |
| 298 | // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a |
| 299 | // bug on ppc64le. glibc may implement pthread locks by wrapping user code |
| 300 | // in a hardware transaction, but, on some older versions of glibc and the |
| 301 | // kernel, syscalls made with |syscall| did not abort the transaction. |
| 302 | CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get()); |
| 303 | #endif |
| 304 | if (!CTR_DRBG_reseed(&state->drbg, seed, NULL, 0)) { |
| 305 | abort(); |
| 306 | } |
| 307 | state->calls = 0; |
| 308 | } else { |
| 309 | #if defined(BORINGSSL_FIPS) |
| 310 | CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get()); |
| 311 | #endif |
| 312 | } |
| 313 | |
| 314 | int first_call = 1; |
| 315 | while (out_len > 0) { |
| 316 | size_t todo = out_len; |
| 317 | if (todo > CTR_DRBG_MAX_GENERATE_LENGTH) { |
| 318 | todo = CTR_DRBG_MAX_GENERATE_LENGTH; |
| 319 | } |
| 320 | |
| 321 | if (!CTR_DRBG_generate(&state->drbg, out, todo, additional_data, |
| 322 | first_call ? sizeof(additional_data) : 0)) { |
| 323 | abort(); |
| 324 | } |
| 325 | |
| 326 | out += todo; |
| 327 | out_len -= todo; |
| 328 | // Though we only check before entering the loop, this cannot add enough to |
| 329 | // overflow a |size_t|. |
| 330 | state->calls++; |
| 331 | first_call = 0; |
| 332 | } |
| 333 | |
| 334 | if (state == &stack_state) { |
| 335 | CTR_DRBG_clear(&state->drbg); |
| 336 | } |
| 337 | |
| 338 | #if defined(BORINGSSL_FIPS) |
| 339 | CRYPTO_STATIC_MUTEX_unlock_read(thread_states_list_lock_bss_get()); |
| 340 | #endif |
| 341 | } |
| 342 | |
| 343 | int RAND_bytes(uint8_t *out, size_t out_len) { |
| 344 | static const uint8_t kZeroAdditionalData[32] = {0}; |
| 345 | RAND_bytes_with_additional_data(out, out_len, kZeroAdditionalData); |
| 346 | return 1; |
| 347 | } |
| 348 | |
| 349 | int RAND_pseudo_bytes(uint8_t *buf, size_t len) { |
| 350 | return RAND_bytes(buf, len); |
| 351 | } |
| 352 | |