1 | /* |
2 | * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License 2.0 (the "License"). You may not use |
5 | * this file except in compliance with the License. You can obtain a copy |
6 | * in the file LICENSE in the source distribution or at |
7 | * https://www.openssl.org/source/license.html |
8 | */ |
9 | |
10 | #include <stdio.h> |
11 | #include <string.h> |
12 | #include <openssl/opensslconf.h> |
13 | #include <openssl/evp.h> |
14 | #include <openssl/objects.h> |
15 | #include <openssl/aes.h> |
16 | #include <openssl/sha.h> |
17 | #include <openssl/rand.h> |
18 | #include "internal/cryptlib.h" |
19 | #include "crypto/modes.h" |
20 | #include "internal/constant_time.h" |
21 | #include "crypto/evp.h" |
22 | |
23 | typedef struct { |
24 | AES_KEY ks; |
25 | SHA256_CTX head, tail, md; |
26 | size_t payload_length; /* AAD length in decrypt case */ |
27 | union { |
28 | unsigned int tls_ver; |
29 | unsigned char tls_aad[16]; /* 13 used */ |
30 | } aux; |
31 | } EVP_AES_HMAC_SHA256; |
32 | |
33 | # define NO_PAYLOAD_LENGTH ((size_t)-1) |
34 | |
35 | #if defined(AES_ASM) && ( \ |
36 | defined(__x86_64) || defined(__x86_64__) || \ |
37 | defined(_M_AMD64) || defined(_M_X64) ) |
38 | |
39 | # define AESNI_CAPABLE (1<<(57-32)) |
40 | |
41 | int aesni_set_encrypt_key(const unsigned char *userKey, int bits, |
42 | AES_KEY *key); |
43 | int aesni_set_decrypt_key(const unsigned char *userKey, int bits, |
44 | AES_KEY *key); |
45 | |
46 | void aesni_cbc_encrypt(const unsigned char *in, |
47 | unsigned char *out, |
48 | size_t length, |
49 | const AES_KEY *key, unsigned char *ivec, int enc); |
50 | |
51 | int aesni_cbc_sha256_enc(const void *inp, void *out, size_t blocks, |
52 | const AES_KEY *key, unsigned char iv[16], |
53 | SHA256_CTX *ctx, const void *in0); |
54 | |
55 | # define data(ctx) ((EVP_AES_HMAC_SHA256 *)EVP_CIPHER_CTX_get_cipher_data(ctx)) |
56 | |
57 | static int aesni_cbc_hmac_sha256_init_key(EVP_CIPHER_CTX *ctx, |
58 | const unsigned char *inkey, |
59 | const unsigned char *iv, int enc) |
60 | { |
61 | EVP_AES_HMAC_SHA256 *key = data(ctx); |
62 | int ret; |
63 | |
64 | if (enc) |
65 | ret = aesni_set_encrypt_key(inkey, |
66 | EVP_CIPHER_CTX_key_length(ctx) * 8, |
67 | &key->ks); |
68 | else |
69 | ret = aesni_set_decrypt_key(inkey, |
70 | EVP_CIPHER_CTX_key_length(ctx) * 8, |
71 | &key->ks); |
72 | |
73 | SHA256_Init(&key->head); /* handy when benchmarking */ |
74 | key->tail = key->head; |
75 | key->md = key->head; |
76 | |
77 | key->payload_length = NO_PAYLOAD_LENGTH; |
78 | |
79 | return ret < 0 ? 0 : 1; |
80 | } |
81 | |
82 | # define STITCHED_CALL |
83 | |
84 | # if !defined(STITCHED_CALL) |
85 | # define aes_off 0 |
86 | # endif |
87 | |
88 | void sha256_block_data_order(void *c, const void *p, size_t len); |
89 | |
90 | static void sha256_update(SHA256_CTX *c, const void *data, size_t len) |
91 | { |
92 | const unsigned char *ptr = data; |
93 | size_t res; |
94 | |
95 | if ((res = c->num)) { |
96 | res = SHA256_CBLOCK - res; |
97 | if (len < res) |
98 | res = len; |
99 | SHA256_Update(c, ptr, res); |
100 | ptr += res; |
101 | len -= res; |
102 | } |
103 | |
104 | res = len % SHA256_CBLOCK; |
105 | len -= res; |
106 | |
107 | if (len) { |
108 | sha256_block_data_order(c, ptr, len / SHA256_CBLOCK); |
109 | |
110 | ptr += len; |
111 | c->Nh += len >> 29; |
112 | c->Nl += len <<= 3; |
113 | if (c->Nl < (unsigned int)len) |
114 | c->Nh++; |
115 | } |
116 | |
117 | if (res) |
118 | SHA256_Update(c, ptr, res); |
119 | } |
120 | |
121 | # ifdef SHA256_Update |
122 | # undef SHA256_Update |
123 | # endif |
124 | # define SHA256_Update sha256_update |
125 | |
126 | # if !defined(OPENSSL_NO_MULTIBLOCK) |
127 | |
128 | typedef struct { |
129 | unsigned int A[8], B[8], C[8], D[8], E[8], F[8], G[8], H[8]; |
130 | } SHA256_MB_CTX; |
131 | typedef struct { |
132 | const unsigned char *ptr; |
133 | int blocks; |
134 | } HASH_DESC; |
135 | |
136 | void sha256_multi_block(SHA256_MB_CTX *, const HASH_DESC *, int); |
137 | |
138 | typedef struct { |
139 | const unsigned char *inp; |
140 | unsigned char *out; |
141 | int blocks; |
142 | u64 iv[2]; |
143 | } CIPH_DESC; |
144 | |
145 | void aesni_multi_cbc_encrypt(CIPH_DESC *, void *, int); |
146 | |
147 | static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, |
148 | unsigned char *out, |
149 | const unsigned char *inp, |
150 | size_t inp_len, int n4x) |
151 | { /* n4x is 1 or 2 */ |
152 | HASH_DESC hash_d[8], edges[8]; |
153 | CIPH_DESC ciph_d[8]; |
154 | unsigned char storage[sizeof(SHA256_MB_CTX) + 32]; |
155 | union { |
156 | u64 q[16]; |
157 | u32 d[32]; |
158 | u8 c[128]; |
159 | } blocks[8]; |
160 | SHA256_MB_CTX *ctx; |
161 | unsigned int frag, last, packlen, i, x4 = 4 * n4x, minblocks, processed = |
162 | 0; |
163 | size_t ret = 0; |
164 | u8 *IVs; |
165 | # if defined(BSWAP8) |
166 | u64 seqnum; |
167 | # endif |
168 | |
169 | /* ask for IVs in bulk */ |
170 | if (RAND_bytes((IVs = blocks[0].c), 16 * x4) <= 0) |
171 | return 0; |
172 | |
173 | /* align */ |
174 | ctx = (SHA256_MB_CTX *) (storage + 32 - ((size_t)storage % 32)); |
175 | |
176 | frag = (unsigned int)inp_len >> (1 + n4x); |
177 | last = (unsigned int)inp_len + frag - (frag << (1 + n4x)); |
178 | if (last > frag && ((last + 13 + 9) % 64) < (x4 - 1)) { |
179 | frag++; |
180 | last -= x4 - 1; |
181 | } |
182 | |
183 | packlen = 5 + 16 + ((frag + 32 + 16) & -16); |
184 | |
185 | /* populate descriptors with pointers and IVs */ |
186 | hash_d[0].ptr = inp; |
187 | ciph_d[0].inp = inp; |
188 | /* 5+16 is place for header and explicit IV */ |
189 | ciph_d[0].out = out + 5 + 16; |
190 | memcpy(ciph_d[0].out - 16, IVs, 16); |
191 | memcpy(ciph_d[0].iv, IVs, 16); |
192 | IVs += 16; |
193 | |
194 | for (i = 1; i < x4; i++) { |
195 | ciph_d[i].inp = hash_d[i].ptr = hash_d[i - 1].ptr + frag; |
196 | ciph_d[i].out = ciph_d[i - 1].out + packlen; |
197 | memcpy(ciph_d[i].out - 16, IVs, 16); |
198 | memcpy(ciph_d[i].iv, IVs, 16); |
199 | IVs += 16; |
200 | } |
201 | |
202 | # if defined(BSWAP8) |
203 | memcpy(blocks[0].c, key->md.data, 8); |
204 | seqnum = BSWAP8(blocks[0].q[0]); |
205 | # endif |
206 | for (i = 0; i < x4; i++) { |
207 | unsigned int len = (i == (x4 - 1) ? last : frag); |
208 | # if !defined(BSWAP8) |
209 | unsigned int carry, j; |
210 | # endif |
211 | |
212 | ctx->A[i] = key->md.h[0]; |
213 | ctx->B[i] = key->md.h[1]; |
214 | ctx->C[i] = key->md.h[2]; |
215 | ctx->D[i] = key->md.h[3]; |
216 | ctx->E[i] = key->md.h[4]; |
217 | ctx->F[i] = key->md.h[5]; |
218 | ctx->G[i] = key->md.h[6]; |
219 | ctx->H[i] = key->md.h[7]; |
220 | |
221 | /* fix seqnum */ |
222 | # if defined(BSWAP8) |
223 | blocks[i].q[0] = BSWAP8(seqnum + i); |
224 | # else |
225 | for (carry = i, j = 8; j--;) { |
226 | blocks[i].c[j] = ((u8 *)key->md.data)[j] + carry; |
227 | carry = (blocks[i].c[j] - carry) >> (sizeof(carry) * 8 - 1); |
228 | } |
229 | # endif |
230 | blocks[i].c[8] = ((u8 *)key->md.data)[8]; |
231 | blocks[i].c[9] = ((u8 *)key->md.data)[9]; |
232 | blocks[i].c[10] = ((u8 *)key->md.data)[10]; |
233 | /* fix length */ |
234 | blocks[i].c[11] = (u8)(len >> 8); |
235 | blocks[i].c[12] = (u8)(len); |
236 | |
237 | memcpy(blocks[i].c + 13, hash_d[i].ptr, 64 - 13); |
238 | hash_d[i].ptr += 64 - 13; |
239 | hash_d[i].blocks = (len - (64 - 13)) / 64; |
240 | |
241 | edges[i].ptr = blocks[i].c; |
242 | edges[i].blocks = 1; |
243 | } |
244 | |
245 | /* hash 13-byte headers and first 64-13 bytes of inputs */ |
246 | sha256_multi_block(ctx, edges, n4x); |
247 | /* hash bulk inputs */ |
248 | # define MAXCHUNKSIZE 2048 |
249 | # if MAXCHUNKSIZE%64 |
250 | # error "MAXCHUNKSIZE is not divisible by 64" |
251 | # elif MAXCHUNKSIZE |
252 | /* |
253 | * goal is to minimize pressure on L1 cache by moving in shorter steps, |
254 | * so that hashed data is still in the cache by the time we encrypt it |
255 | */ |
256 | minblocks = ((frag <= last ? frag : last) - (64 - 13)) / 64; |
257 | if (minblocks > MAXCHUNKSIZE / 64) { |
258 | for (i = 0; i < x4; i++) { |
259 | edges[i].ptr = hash_d[i].ptr; |
260 | edges[i].blocks = MAXCHUNKSIZE / 64; |
261 | ciph_d[i].blocks = MAXCHUNKSIZE / 16; |
262 | } |
263 | do { |
264 | sha256_multi_block(ctx, edges, n4x); |
265 | aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); |
266 | |
267 | for (i = 0; i < x4; i++) { |
268 | edges[i].ptr = hash_d[i].ptr += MAXCHUNKSIZE; |
269 | hash_d[i].blocks -= MAXCHUNKSIZE / 64; |
270 | edges[i].blocks = MAXCHUNKSIZE / 64; |
271 | ciph_d[i].inp += MAXCHUNKSIZE; |
272 | ciph_d[i].out += MAXCHUNKSIZE; |
273 | ciph_d[i].blocks = MAXCHUNKSIZE / 16; |
274 | memcpy(ciph_d[i].iv, ciph_d[i].out - 16, 16); |
275 | } |
276 | processed += MAXCHUNKSIZE; |
277 | minblocks -= MAXCHUNKSIZE / 64; |
278 | } while (minblocks > MAXCHUNKSIZE / 64); |
279 | } |
280 | # endif |
281 | # undef MAXCHUNKSIZE |
282 | sha256_multi_block(ctx, hash_d, n4x); |
283 | |
284 | memset(blocks, 0, sizeof(blocks)); |
285 | for (i = 0; i < x4; i++) { |
286 | unsigned int len = (i == (x4 - 1) ? last : frag), |
287 | off = hash_d[i].blocks * 64; |
288 | const unsigned char *ptr = hash_d[i].ptr + off; |
289 | |
290 | off = (len - processed) - (64 - 13) - off; /* remainder actually */ |
291 | memcpy(blocks[i].c, ptr, off); |
292 | blocks[i].c[off] = 0x80; |
293 | len += 64 + 13; /* 64 is HMAC header */ |
294 | len *= 8; /* convert to bits */ |
295 | if (off < (64 - 8)) { |
296 | # ifdef BSWAP4 |
297 | blocks[i].d[15] = BSWAP4(len); |
298 | # else |
299 | PUTU32(blocks[i].c + 60, len); |
300 | # endif |
301 | edges[i].blocks = 1; |
302 | } else { |
303 | # ifdef BSWAP4 |
304 | blocks[i].d[31] = BSWAP4(len); |
305 | # else |
306 | PUTU32(blocks[i].c + 124, len); |
307 | # endif |
308 | edges[i].blocks = 2; |
309 | } |
310 | edges[i].ptr = blocks[i].c; |
311 | } |
312 | |
313 | /* hash input tails and finalize */ |
314 | sha256_multi_block(ctx, edges, n4x); |
315 | |
316 | memset(blocks, 0, sizeof(blocks)); |
317 | for (i = 0; i < x4; i++) { |
318 | # ifdef BSWAP4 |
319 | blocks[i].d[0] = BSWAP4(ctx->A[i]); |
320 | ctx->A[i] = key->tail.h[0]; |
321 | blocks[i].d[1] = BSWAP4(ctx->B[i]); |
322 | ctx->B[i] = key->tail.h[1]; |
323 | blocks[i].d[2] = BSWAP4(ctx->C[i]); |
324 | ctx->C[i] = key->tail.h[2]; |
325 | blocks[i].d[3] = BSWAP4(ctx->D[i]); |
326 | ctx->D[i] = key->tail.h[3]; |
327 | blocks[i].d[4] = BSWAP4(ctx->E[i]); |
328 | ctx->E[i] = key->tail.h[4]; |
329 | blocks[i].d[5] = BSWAP4(ctx->F[i]); |
330 | ctx->F[i] = key->tail.h[5]; |
331 | blocks[i].d[6] = BSWAP4(ctx->G[i]); |
332 | ctx->G[i] = key->tail.h[6]; |
333 | blocks[i].d[7] = BSWAP4(ctx->H[i]); |
334 | ctx->H[i] = key->tail.h[7]; |
335 | blocks[i].c[32] = 0x80; |
336 | blocks[i].d[15] = BSWAP4((64 + 32) * 8); |
337 | # else |
338 | PUTU32(blocks[i].c + 0, ctx->A[i]); |
339 | ctx->A[i] = key->tail.h[0]; |
340 | PUTU32(blocks[i].c + 4, ctx->B[i]); |
341 | ctx->B[i] = key->tail.h[1]; |
342 | PUTU32(blocks[i].c + 8, ctx->C[i]); |
343 | ctx->C[i] = key->tail.h[2]; |
344 | PUTU32(blocks[i].c + 12, ctx->D[i]); |
345 | ctx->D[i] = key->tail.h[3]; |
346 | PUTU32(blocks[i].c + 16, ctx->E[i]); |
347 | ctx->E[i] = key->tail.h[4]; |
348 | PUTU32(blocks[i].c + 20, ctx->F[i]); |
349 | ctx->F[i] = key->tail.h[5]; |
350 | PUTU32(blocks[i].c + 24, ctx->G[i]); |
351 | ctx->G[i] = key->tail.h[6]; |
352 | PUTU32(blocks[i].c + 28, ctx->H[i]); |
353 | ctx->H[i] = key->tail.h[7]; |
354 | blocks[i].c[32] = 0x80; |
355 | PUTU32(blocks[i].c + 60, (64 + 32) * 8); |
356 | # endif |
357 | edges[i].ptr = blocks[i].c; |
358 | edges[i].blocks = 1; |
359 | } |
360 | |
361 | /* finalize MACs */ |
362 | sha256_multi_block(ctx, edges, n4x); |
363 | |
364 | for (i = 0; i < x4; i++) { |
365 | unsigned int len = (i == (x4 - 1) ? last : frag), pad, j; |
366 | unsigned char *out0 = out; |
367 | |
368 | memcpy(ciph_d[i].out, ciph_d[i].inp, len - processed); |
369 | ciph_d[i].inp = ciph_d[i].out; |
370 | |
371 | out += 5 + 16 + len; |
372 | |
373 | /* write MAC */ |
374 | PUTU32(out + 0, ctx->A[i]); |
375 | PUTU32(out + 4, ctx->B[i]); |
376 | PUTU32(out + 8, ctx->C[i]); |
377 | PUTU32(out + 12, ctx->D[i]); |
378 | PUTU32(out + 16, ctx->E[i]); |
379 | PUTU32(out + 20, ctx->F[i]); |
380 | PUTU32(out + 24, ctx->G[i]); |
381 | PUTU32(out + 28, ctx->H[i]); |
382 | out += 32; |
383 | len += 32; |
384 | |
385 | /* pad */ |
386 | pad = 15 - len % 16; |
387 | for (j = 0; j <= pad; j++) |
388 | *(out++) = pad; |
389 | len += pad + 1; |
390 | |
391 | ciph_d[i].blocks = (len - processed) / 16; |
392 | len += 16; /* account for explicit iv */ |
393 | |
394 | /* arrange header */ |
395 | out0[0] = ((u8 *)key->md.data)[8]; |
396 | out0[1] = ((u8 *)key->md.data)[9]; |
397 | out0[2] = ((u8 *)key->md.data)[10]; |
398 | out0[3] = (u8)(len >> 8); |
399 | out0[4] = (u8)(len); |
400 | |
401 | ret += len + 5; |
402 | inp += frag; |
403 | } |
404 | |
405 | aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); |
406 | |
407 | OPENSSL_cleanse(blocks, sizeof(blocks)); |
408 | OPENSSL_cleanse(ctx, sizeof(*ctx)); |
409 | |
410 | return ret; |
411 | } |
412 | # endif |
413 | |
414 | static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, |
415 | unsigned char *out, |
416 | const unsigned char *in, size_t len) |
417 | { |
418 | EVP_AES_HMAC_SHA256 *key = data(ctx); |
419 | unsigned int l; |
420 | size_t plen = key->payload_length, iv = 0, /* explicit IV in TLS 1.1 and |
421 | * later */ |
422 | sha_off = 0; |
423 | # if defined(STITCHED_CALL) |
424 | size_t aes_off = 0, blocks; |
425 | |
426 | sha_off = SHA256_CBLOCK - key->md.num; |
427 | # endif |
428 | |
429 | key->payload_length = NO_PAYLOAD_LENGTH; |
430 | |
431 | if (len % AES_BLOCK_SIZE) |
432 | return 0; |
433 | |
434 | if (EVP_CIPHER_CTX_encrypting(ctx)) { |
435 | if (plen == NO_PAYLOAD_LENGTH) |
436 | plen = len; |
437 | else if (len != |
438 | ((plen + SHA256_DIGEST_LENGTH + |
439 | AES_BLOCK_SIZE) & -AES_BLOCK_SIZE)) |
440 | return 0; |
441 | else if (key->aux.tls_ver >= TLS1_1_VERSION) |
442 | iv = AES_BLOCK_SIZE; |
443 | |
444 | # if defined(STITCHED_CALL) |
445 | /* |
446 | * Assembly stitch handles AVX-capable processors, but its |
447 | * performance is not optimal on AMD Jaguar, ~40% worse, for |
448 | * unknown reasons. Incidentally processor in question supports |
449 | * AVX, but not AMD-specific XOP extension, which can be used |
450 | * to identify it and avoid stitch invocation. So that after we |
451 | * establish that current CPU supports AVX, we even see if it's |
452 | * either even XOP-capable Bulldozer-based or GenuineIntel one. |
453 | * But SHAEXT-capable go ahead... |
454 | */ |
455 | if (((OPENSSL_ia32cap_P[2] & (1 << 29)) || /* SHAEXT? */ |
456 | ((OPENSSL_ia32cap_P[1] & (1 << (60 - 32))) && /* AVX? */ |
457 | ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */ |
458 | | (OPENSSL_ia32cap_P[0] & (1 << 30))))) && /* "Intel CPU"? */ |
459 | plen > (sha_off + iv) && |
460 | (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) { |
461 | SHA256_Update(&key->md, in + iv, sha_off); |
462 | |
463 | (void)aesni_cbc_sha256_enc(in, out, blocks, &key->ks, |
464 | EVP_CIPHER_CTX_iv_noconst(ctx), |
465 | &key->md, in + iv + sha_off); |
466 | blocks *= SHA256_CBLOCK; |
467 | aes_off += blocks; |
468 | sha_off += blocks; |
469 | key->md.Nh += blocks >> 29; |
470 | key->md.Nl += blocks <<= 3; |
471 | if (key->md.Nl < (unsigned int)blocks) |
472 | key->md.Nh++; |
473 | } else { |
474 | sha_off = 0; |
475 | } |
476 | # endif |
477 | sha_off += iv; |
478 | SHA256_Update(&key->md, in + sha_off, plen - sha_off); |
479 | |
480 | if (plen != len) { /* "TLS" mode of operation */ |
481 | if (in != out) |
482 | memcpy(out + aes_off, in + aes_off, plen - aes_off); |
483 | |
484 | /* calculate HMAC and append it to payload */ |
485 | SHA256_Final(out + plen, &key->md); |
486 | key->md = key->tail; |
487 | SHA256_Update(&key->md, out + plen, SHA256_DIGEST_LENGTH); |
488 | SHA256_Final(out + plen, &key->md); |
489 | |
490 | /* pad the payload|hmac */ |
491 | plen += SHA256_DIGEST_LENGTH; |
492 | for (l = len - plen - 1; plen < len; plen++) |
493 | out[plen] = l; |
494 | /* encrypt HMAC|padding at once */ |
495 | aesni_cbc_encrypt(out + aes_off, out + aes_off, len - aes_off, |
496 | &key->ks, EVP_CIPHER_CTX_iv_noconst(ctx), 1); |
497 | } else { |
498 | aesni_cbc_encrypt(in + aes_off, out + aes_off, len - aes_off, |
499 | &key->ks, EVP_CIPHER_CTX_iv_noconst(ctx), 1); |
500 | } |
501 | } else { |
502 | union { |
503 | unsigned int u[SHA256_DIGEST_LENGTH / sizeof(unsigned int)]; |
504 | unsigned char c[64 + SHA256_DIGEST_LENGTH]; |
505 | } mac, *pmac; |
506 | |
507 | /* arrange cache line alignment */ |
508 | pmac = (void *)(((size_t)mac.c + 63) & ((size_t)0 - 64)); |
509 | |
510 | /* decrypt HMAC|padding at once */ |
511 | aesni_cbc_encrypt(in, out, len, &key->ks, |
512 | EVP_CIPHER_CTX_iv_noconst(ctx), 0); |
513 | |
514 | if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ |
515 | size_t inp_len, mask, j, i; |
516 | unsigned int res, maxpad, pad, bitlen; |
517 | int ret = 1; |
518 | union { |
519 | unsigned int u[SHA_LBLOCK]; |
520 | unsigned char c[SHA256_CBLOCK]; |
521 | } *data = (void *)key->md.data; |
522 | |
523 | if ((key->aux.tls_aad[plen - 4] << 8 | key->aux.tls_aad[plen - 3]) |
524 | >= TLS1_1_VERSION) |
525 | iv = AES_BLOCK_SIZE; |
526 | |
527 | if (len < (iv + SHA256_DIGEST_LENGTH + 1)) |
528 | return 0; |
529 | |
530 | /* omit explicit iv */ |
531 | out += iv; |
532 | len -= iv; |
533 | |
534 | /* figure out payload length */ |
535 | pad = out[len - 1]; |
536 | maxpad = len - (SHA256_DIGEST_LENGTH + 1); |
537 | maxpad |= (255 - maxpad) >> (sizeof(maxpad) * 8 - 8); |
538 | maxpad &= 255; |
539 | |
540 | mask = constant_time_ge(maxpad, pad); |
541 | ret &= mask; |
542 | /* |
543 | * If pad is invalid then we will fail the above test but we must |
544 | * continue anyway because we are in constant time code. However, |
545 | * we'll use the maxpad value instead of the supplied pad to make |
546 | * sure we perform well defined pointer arithmetic. |
547 | */ |
548 | pad = constant_time_select(mask, pad, maxpad); |
549 | |
550 | inp_len = len - (SHA256_DIGEST_LENGTH + pad + 1); |
551 | |
552 | key->aux.tls_aad[plen - 2] = inp_len >> 8; |
553 | key->aux.tls_aad[plen - 1] = inp_len; |
554 | |
555 | /* calculate HMAC */ |
556 | key->md = key->head; |
557 | SHA256_Update(&key->md, key->aux.tls_aad, plen); |
558 | |
559 | # if 1 /* see original reference version in #else */ |
560 | len -= SHA256_DIGEST_LENGTH; /* amend mac */ |
561 | if (len >= (256 + SHA256_CBLOCK)) { |
562 | j = (len - (256 + SHA256_CBLOCK)) & (0 - SHA256_CBLOCK); |
563 | j += SHA256_CBLOCK - key->md.num; |
564 | SHA256_Update(&key->md, out, j); |
565 | out += j; |
566 | len -= j; |
567 | inp_len -= j; |
568 | } |
569 | |
570 | /* but pretend as if we hashed padded payload */ |
571 | bitlen = key->md.Nl + (inp_len << 3); /* at most 18 bits */ |
572 | # ifdef BSWAP4 |
573 | bitlen = BSWAP4(bitlen); |
574 | # else |
575 | mac.c[0] = 0; |
576 | mac.c[1] = (unsigned char)(bitlen >> 16); |
577 | mac.c[2] = (unsigned char)(bitlen >> 8); |
578 | mac.c[3] = (unsigned char)bitlen; |
579 | bitlen = mac.u[0]; |
580 | # endif |
581 | |
582 | pmac->u[0] = 0; |
583 | pmac->u[1] = 0; |
584 | pmac->u[2] = 0; |
585 | pmac->u[3] = 0; |
586 | pmac->u[4] = 0; |
587 | pmac->u[5] = 0; |
588 | pmac->u[6] = 0; |
589 | pmac->u[7] = 0; |
590 | |
591 | for (res = key->md.num, j = 0; j < len; j++) { |
592 | size_t c = out[j]; |
593 | mask = (j - inp_len) >> (sizeof(j) * 8 - 8); |
594 | c &= mask; |
595 | c |= 0x80 & ~mask & ~((inp_len - j) >> (sizeof(j) * 8 - 8)); |
596 | data->c[res++] = (unsigned char)c; |
597 | |
598 | if (res != SHA256_CBLOCK) |
599 | continue; |
600 | |
601 | /* j is not incremented yet */ |
602 | mask = 0 - ((inp_len + 7 - j) >> (sizeof(j) * 8 - 1)); |
603 | data->u[SHA_LBLOCK - 1] |= bitlen & mask; |
604 | sha256_block_data_order(&key->md, data, 1); |
605 | mask &= 0 - ((j - inp_len - 72) >> (sizeof(j) * 8 - 1)); |
606 | pmac->u[0] |= key->md.h[0] & mask; |
607 | pmac->u[1] |= key->md.h[1] & mask; |
608 | pmac->u[2] |= key->md.h[2] & mask; |
609 | pmac->u[3] |= key->md.h[3] & mask; |
610 | pmac->u[4] |= key->md.h[4] & mask; |
611 | pmac->u[5] |= key->md.h[5] & mask; |
612 | pmac->u[6] |= key->md.h[6] & mask; |
613 | pmac->u[7] |= key->md.h[7] & mask; |
614 | res = 0; |
615 | } |
616 | |
617 | for (i = res; i < SHA256_CBLOCK; i++, j++) |
618 | data->c[i] = 0; |
619 | |
620 | if (res > SHA256_CBLOCK - 8) { |
621 | mask = 0 - ((inp_len + 8 - j) >> (sizeof(j) * 8 - 1)); |
622 | data->u[SHA_LBLOCK - 1] |= bitlen & mask; |
623 | sha256_block_data_order(&key->md, data, 1); |
624 | mask &= 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1)); |
625 | pmac->u[0] |= key->md.h[0] & mask; |
626 | pmac->u[1] |= key->md.h[1] & mask; |
627 | pmac->u[2] |= key->md.h[2] & mask; |
628 | pmac->u[3] |= key->md.h[3] & mask; |
629 | pmac->u[4] |= key->md.h[4] & mask; |
630 | pmac->u[5] |= key->md.h[5] & mask; |
631 | pmac->u[6] |= key->md.h[6] & mask; |
632 | pmac->u[7] |= key->md.h[7] & mask; |
633 | |
634 | memset(data, 0, SHA256_CBLOCK); |
635 | j += 64; |
636 | } |
637 | data->u[SHA_LBLOCK - 1] = bitlen; |
638 | sha256_block_data_order(&key->md, data, 1); |
639 | mask = 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1)); |
640 | pmac->u[0] |= key->md.h[0] & mask; |
641 | pmac->u[1] |= key->md.h[1] & mask; |
642 | pmac->u[2] |= key->md.h[2] & mask; |
643 | pmac->u[3] |= key->md.h[3] & mask; |
644 | pmac->u[4] |= key->md.h[4] & mask; |
645 | pmac->u[5] |= key->md.h[5] & mask; |
646 | pmac->u[6] |= key->md.h[6] & mask; |
647 | pmac->u[7] |= key->md.h[7] & mask; |
648 | |
649 | # ifdef BSWAP4 |
650 | pmac->u[0] = BSWAP4(pmac->u[0]); |
651 | pmac->u[1] = BSWAP4(pmac->u[1]); |
652 | pmac->u[2] = BSWAP4(pmac->u[2]); |
653 | pmac->u[3] = BSWAP4(pmac->u[3]); |
654 | pmac->u[4] = BSWAP4(pmac->u[4]); |
655 | pmac->u[5] = BSWAP4(pmac->u[5]); |
656 | pmac->u[6] = BSWAP4(pmac->u[6]); |
657 | pmac->u[7] = BSWAP4(pmac->u[7]); |
658 | # else |
659 | for (i = 0; i < 8; i++) { |
660 | res = pmac->u[i]; |
661 | pmac->c[4 * i + 0] = (unsigned char)(res >> 24); |
662 | pmac->c[4 * i + 1] = (unsigned char)(res >> 16); |
663 | pmac->c[4 * i + 2] = (unsigned char)(res >> 8); |
664 | pmac->c[4 * i + 3] = (unsigned char)res; |
665 | } |
666 | # endif |
667 | len += SHA256_DIGEST_LENGTH; |
668 | # else |
669 | SHA256_Update(&key->md, out, inp_len); |
670 | res = key->md.num; |
671 | SHA256_Final(pmac->c, &key->md); |
672 | |
673 | { |
674 | unsigned int inp_blocks, pad_blocks; |
675 | |
676 | /* but pretend as if we hashed padded payload */ |
677 | inp_blocks = |
678 | 1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1)); |
679 | res += (unsigned int)(len - inp_len); |
680 | pad_blocks = res / SHA256_CBLOCK; |
681 | res %= SHA256_CBLOCK; |
682 | pad_blocks += |
683 | 1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1)); |
684 | for (; inp_blocks < pad_blocks; inp_blocks++) |
685 | sha1_block_data_order(&key->md, data, 1); |
686 | } |
687 | # endif /* pre-lucky-13 reference version of above */ |
688 | key->md = key->tail; |
689 | SHA256_Update(&key->md, pmac->c, SHA256_DIGEST_LENGTH); |
690 | SHA256_Final(pmac->c, &key->md); |
691 | |
692 | /* verify HMAC */ |
693 | out += inp_len; |
694 | len -= inp_len; |
695 | # if 1 /* see original reference version in #else */ |
696 | { |
697 | unsigned char *p = |
698 | out + len - 1 - maxpad - SHA256_DIGEST_LENGTH; |
699 | size_t off = out - p; |
700 | unsigned int c, cmask; |
701 | |
702 | maxpad += SHA256_DIGEST_LENGTH; |
703 | for (res = 0, i = 0, j = 0; j < maxpad; j++) { |
704 | c = p[j]; |
705 | cmask = |
706 | ((int)(j - off - SHA256_DIGEST_LENGTH)) >> |
707 | (sizeof(int) * 8 - 1); |
708 | res |= (c ^ pad) & ~cmask; /* ... and padding */ |
709 | cmask &= ((int)(off - 1 - j)) >> (sizeof(int) * 8 - 1); |
710 | res |= (c ^ pmac->c[i]) & cmask; |
711 | i += 1 & cmask; |
712 | } |
713 | maxpad -= SHA256_DIGEST_LENGTH; |
714 | |
715 | res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1)); |
716 | ret &= (int)~res; |
717 | } |
718 | # else /* pre-lucky-13 reference version of above */ |
719 | for (res = 0, i = 0; i < SHA256_DIGEST_LENGTH; i++) |
720 | res |= out[i] ^ pmac->c[i]; |
721 | res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1)); |
722 | ret &= (int)~res; |
723 | |
724 | /* verify padding */ |
725 | pad = (pad & ~res) | (maxpad & res); |
726 | out = out + len - 1 - pad; |
727 | for (res = 0, i = 0; i < pad; i++) |
728 | res |= out[i] ^ pad; |
729 | |
730 | res = (0 - res) >> (sizeof(res) * 8 - 1); |
731 | ret &= (int)~res; |
732 | # endif |
733 | return ret; |
734 | } else { |
735 | SHA256_Update(&key->md, out, len); |
736 | } |
737 | } |
738 | |
739 | return 1; |
740 | } |
741 | |
742 | static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, |
743 | void *ptr) |
744 | { |
745 | EVP_AES_HMAC_SHA256 *key = data(ctx); |
746 | unsigned int u_arg = (unsigned int)arg; |
747 | |
748 | switch (type) { |
749 | case EVP_CTRL_AEAD_SET_MAC_KEY: |
750 | { |
751 | unsigned int i; |
752 | unsigned char hmac_key[64]; |
753 | |
754 | memset(hmac_key, 0, sizeof(hmac_key)); |
755 | |
756 | if (arg < 0) |
757 | return -1; |
758 | |
759 | if (u_arg > sizeof(hmac_key)) { |
760 | SHA256_Init(&key->head); |
761 | SHA256_Update(&key->head, ptr, arg); |
762 | SHA256_Final(hmac_key, &key->head); |
763 | } else { |
764 | memcpy(hmac_key, ptr, arg); |
765 | } |
766 | |
767 | for (i = 0; i < sizeof(hmac_key); i++) |
768 | hmac_key[i] ^= 0x36; /* ipad */ |
769 | SHA256_Init(&key->head); |
770 | SHA256_Update(&key->head, hmac_key, sizeof(hmac_key)); |
771 | |
772 | for (i = 0; i < sizeof(hmac_key); i++) |
773 | hmac_key[i] ^= 0x36 ^ 0x5c; /* opad */ |
774 | SHA256_Init(&key->tail); |
775 | SHA256_Update(&key->tail, hmac_key, sizeof(hmac_key)); |
776 | |
777 | OPENSSL_cleanse(hmac_key, sizeof(hmac_key)); |
778 | |
779 | return 1; |
780 | } |
781 | case EVP_CTRL_AEAD_TLS1_AAD: |
782 | { |
783 | unsigned char *p = ptr; |
784 | unsigned int len; |
785 | |
786 | if (arg != EVP_AEAD_TLS1_AAD_LEN) |
787 | return -1; |
788 | |
789 | len = p[arg - 2] << 8 | p[arg - 1]; |
790 | |
791 | if (EVP_CIPHER_CTX_encrypting(ctx)) { |
792 | key->payload_length = len; |
793 | if ((key->aux.tls_ver = |
794 | p[arg - 4] << 8 | p[arg - 3]) >= TLS1_1_VERSION) { |
795 | if (len < AES_BLOCK_SIZE) |
796 | return 0; |
797 | len -= AES_BLOCK_SIZE; |
798 | p[arg - 2] = len >> 8; |
799 | p[arg - 1] = len; |
800 | } |
801 | key->md = key->head; |
802 | SHA256_Update(&key->md, p, arg); |
803 | |
804 | return (int)(((len + SHA256_DIGEST_LENGTH + |
805 | AES_BLOCK_SIZE) & -AES_BLOCK_SIZE) |
806 | - len); |
807 | } else { |
808 | memcpy(key->aux.tls_aad, ptr, arg); |
809 | key->payload_length = arg; |
810 | |
811 | return SHA256_DIGEST_LENGTH; |
812 | } |
813 | } |
814 | # if !defined(OPENSSL_NO_MULTIBLOCK) |
815 | case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: |
816 | return (int)(5 + 16 + ((arg + 32 + 16) & -16)); |
817 | case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: |
818 | { |
819 | EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = |
820 | (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; |
821 | unsigned int n4x = 1, x4; |
822 | unsigned int frag, last, packlen, inp_len; |
823 | |
824 | if (arg < 0) |
825 | return -1; |
826 | |
827 | if (u_arg < sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) |
828 | return -1; |
829 | |
830 | inp_len = param->inp[11] << 8 | param->inp[12]; |
831 | |
832 | if (EVP_CIPHER_CTX_encrypting(ctx)) { |
833 | if ((param->inp[9] << 8 | param->inp[10]) < TLS1_1_VERSION) |
834 | return -1; |
835 | |
836 | if (inp_len) { |
837 | if (inp_len < 4096) |
838 | return 0; /* too short */ |
839 | |
840 | if (inp_len >= 8192 && OPENSSL_ia32cap_P[2] & (1 << 5)) |
841 | n4x = 2; /* AVX2 */ |
842 | } else if ((n4x = param->interleave / 4) && n4x <= 2) |
843 | inp_len = param->len; |
844 | else |
845 | return -1; |
846 | |
847 | key->md = key->head; |
848 | SHA256_Update(&key->md, param->inp, 13); |
849 | |
850 | x4 = 4 * n4x; |
851 | n4x += 1; |
852 | |
853 | frag = inp_len >> n4x; |
854 | last = inp_len + frag - (frag << n4x); |
855 | if (last > frag && ((last + 13 + 9) % 64 < (x4 - 1))) { |
856 | frag++; |
857 | last -= x4 - 1; |
858 | } |
859 | |
860 | packlen = 5 + 16 + ((frag + 32 + 16) & -16); |
861 | packlen = (packlen << n4x) - packlen; |
862 | packlen += 5 + 16 + ((last + 32 + 16) & -16); |
863 | |
864 | param->interleave = x4; |
865 | |
866 | return (int)packlen; |
867 | } else |
868 | return -1; /* not yet */ |
869 | } |
870 | case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: |
871 | { |
872 | EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = |
873 | (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; |
874 | |
875 | return (int)tls1_1_multi_block_encrypt(key, param->out, |
876 | param->inp, param->len, |
877 | param->interleave / 4); |
878 | } |
879 | case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: |
880 | # endif |
881 | default: |
882 | return -1; |
883 | } |
884 | } |
885 | |
886 | static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher = { |
887 | # ifdef NID_aes_128_cbc_hmac_sha256 |
888 | NID_aes_128_cbc_hmac_sha256, |
889 | # else |
890 | NID_undef, |
891 | # endif |
892 | AES_BLOCK_SIZE, 16, AES_BLOCK_SIZE, |
893 | EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | |
894 | EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, |
895 | aesni_cbc_hmac_sha256_init_key, |
896 | aesni_cbc_hmac_sha256_cipher, |
897 | NULL, |
898 | sizeof(EVP_AES_HMAC_SHA256), |
899 | EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv, |
900 | EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv, |
901 | aesni_cbc_hmac_sha256_ctrl, |
902 | NULL |
903 | }; |
904 | |
905 | static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher = { |
906 | # ifdef NID_aes_256_cbc_hmac_sha256 |
907 | NID_aes_256_cbc_hmac_sha256, |
908 | # else |
909 | NID_undef, |
910 | # endif |
911 | AES_BLOCK_SIZE, 32, AES_BLOCK_SIZE, |
912 | EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | |
913 | EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, |
914 | aesni_cbc_hmac_sha256_init_key, |
915 | aesni_cbc_hmac_sha256_cipher, |
916 | NULL, |
917 | sizeof(EVP_AES_HMAC_SHA256), |
918 | EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv, |
919 | EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv, |
920 | aesni_cbc_hmac_sha256_ctrl, |
921 | NULL |
922 | }; |
923 | |
924 | const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) |
925 | { |
926 | return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) && |
927 | aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ? |
928 | &aesni_128_cbc_hmac_sha256_cipher : NULL); |
929 | } |
930 | |
931 | const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) |
932 | { |
933 | return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) && |
934 | aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ? |
935 | &aesni_256_cbc_hmac_sha256_cipher : NULL); |
936 | } |
937 | #else |
938 | const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) |
939 | { |
940 | return NULL; |
941 | } |
942 | |
943 | const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) |
944 | { |
945 | return NULL; |
946 | } |
947 | #endif |
948 | |