1/* ====================================================================
2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ==================================================================== */
48
49#include <openssl/base.h>
50
51#include <assert.h>
52#include <string.h>
53
54#include <openssl/mem.h>
55#include <openssl/cpu.h>
56
57#include "internal.h"
58#include "../../internal.h"
59
60
61#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
62#define REDUCE1BIT(V) \
63 do { \
64 if (sizeof(size_t) == 8) { \
65 uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
66 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
67 (V).hi = ((V).hi >> 1) ^ T; \
68 } else { \
69 uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \
70 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
71 (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \
72 } \
73 } while (0)
74
75// kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
76// bits of a |size_t|.
77static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
78
79void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]) {
80 u128 V;
81
82 Htable[0].hi = 0;
83 Htable[0].lo = 0;
84 V.hi = H[0];
85 V.lo = H[1];
86
87 Htable[8] = V;
88 REDUCE1BIT(V);
89 Htable[4] = V;
90 REDUCE1BIT(V);
91 Htable[2] = V;
92 REDUCE1BIT(V);
93 Htable[1] = V;
94 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
95 V = Htable[4];
96 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
97 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
98 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
99 V = Htable[8];
100 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
101 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
102 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
103 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
104 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
105 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
106 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
107
108#if defined(GHASH_ASM) && defined(OPENSSL_ARM)
109 for (int j = 0; j < 16; ++j) {
110 V = Htable[j];
111 Htable[j].hi = V.lo;
112 Htable[j].lo = V.hi;
113 }
114#endif
115}
116
117#if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
118static const size_t rem_4bit[16] = {
119 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
120 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
121 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
122 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
123
124void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
125 u128 Z;
126 int cnt = 15;
127 size_t rem, nlo, nhi;
128
129 nlo = ((const uint8_t *)Xi)[15];
130 nhi = nlo >> 4;
131 nlo &= 0xf;
132
133 Z.hi = Htable[nlo].hi;
134 Z.lo = Htable[nlo].lo;
135
136 while (1) {
137 rem = (size_t)Z.lo & 0xf;
138 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
139 Z.hi = (Z.hi >> 4);
140 if (sizeof(size_t) == 8) {
141 Z.hi ^= rem_4bit[rem];
142 } else {
143 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
144 }
145
146 Z.hi ^= Htable[nhi].hi;
147 Z.lo ^= Htable[nhi].lo;
148
149 if (--cnt < 0) {
150 break;
151 }
152
153 nlo = ((const uint8_t *)Xi)[cnt];
154 nhi = nlo >> 4;
155 nlo &= 0xf;
156
157 rem = (size_t)Z.lo & 0xf;
158 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
159 Z.hi = (Z.hi >> 4);
160 if (sizeof(size_t) == 8) {
161 Z.hi ^= rem_4bit[rem];
162 } else {
163 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
164 }
165
166 Z.hi ^= Htable[nlo].hi;
167 Z.lo ^= Htable[nlo].lo;
168 }
169
170 Xi[0] = CRYPTO_bswap8(Z.hi);
171 Xi[1] = CRYPTO_bswap8(Z.lo);
172}
173
174// Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
175// details... Compiler-generated code doesn't seem to give any
176// performance improvement, at least not on x86[_64]. It's here
177// mostly as reference and a placeholder for possible future
178// non-trivial optimization[s]...
179void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
180 size_t len) {
181 u128 Z;
182 int cnt;
183 size_t rem, nlo, nhi;
184
185 do {
186 cnt = 15;
187 nlo = ((const uint8_t *)Xi)[15];
188 nlo ^= inp[15];
189 nhi = nlo >> 4;
190 nlo &= 0xf;
191
192 Z.hi = Htable[nlo].hi;
193 Z.lo = Htable[nlo].lo;
194
195 while (1) {
196 rem = (size_t)Z.lo & 0xf;
197 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
198 Z.hi = (Z.hi >> 4);
199 if (sizeof(size_t) == 8) {
200 Z.hi ^= rem_4bit[rem];
201 } else {
202 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
203 }
204
205 Z.hi ^= Htable[nhi].hi;
206 Z.lo ^= Htable[nhi].lo;
207
208 if (--cnt < 0) {
209 break;
210 }
211
212 nlo = ((const uint8_t *)Xi)[cnt];
213 nlo ^= inp[cnt];
214 nhi = nlo >> 4;
215 nlo &= 0xf;
216
217 rem = (size_t)Z.lo & 0xf;
218 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
219 Z.hi = (Z.hi >> 4);
220 if (sizeof(size_t) == 8) {
221 Z.hi ^= rem_4bit[rem];
222 } else {
223 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
224 }
225
226 Z.hi ^= Htable[nlo].hi;
227 Z.lo ^= Htable[nlo].lo;
228 }
229
230 Xi[0] = CRYPTO_bswap8(Z.hi);
231 Xi[1] = CRYPTO_bswap8(Z.lo);
232 } while (inp += 16, len -= 16);
233}
234#endif // !GHASH_ASM || AARCH64 || PPC64LE
235
236#define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable)
237#define GHASH(ctx, in, len) \
238 gcm_ghash_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
239// GHASH_CHUNK is "stride parameter" missioned to mitigate cache
240// trashing effect. In other words idea is to hash data while it's
241// still in L1 cache after encryption pass...
242#define GHASH_CHUNK (3 * 1024)
243
244#if defined(GHASH_ASM_X86_64) || defined(GHASH_ASM_X86)
245void gcm_init_ssse3(u128 Htable[16], const uint64_t Xi[2]) {
246 // Run the existing 4-bit version.
247 gcm_init_4bit(Htable, Xi);
248
249 // First, swap hi and lo. The "4bit" version places hi first. It treats the
250 // two fields separately, so the order does not matter, but ghash-ssse3 reads
251 // the entire state into one 128-bit register.
252 for (int i = 0; i < 16; i++) {
253 uint64_t tmp = Htable[i].hi;
254 Htable[i].hi = Htable[i].lo;
255 Htable[i].lo = tmp;
256 }
257
258 // Treat |Htable| as a 16x16 byte table and transpose it. Thus, Htable[i]
259 // contains the i'th byte of j*H for all j.
260 uint8_t *Hbytes = (uint8_t *)Htable;
261 for (int i = 0; i < 16; i++) {
262 for (int j = 0; j < i; j++) {
263 uint8_t tmp = Hbytes[16*i + j];
264 Hbytes[16*i + j] = Hbytes[16*j + i];
265 Hbytes[16*j + i] = tmp;
266 }
267 }
268}
269#endif // GHASH_ASM_X86_64 || GHASH_ASM_X86
270
271#ifdef GCM_FUNCREF_4BIT
272#undef GCM_MUL
273#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable)
274#undef GHASH
275#define GHASH(ctx, in, len) \
276 (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
277#endif // GCM_FUNCREF_4BIT
278
279void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
280 u128 *out_key, u128 out_table[16], int *out_is_avx,
281 const uint8_t gcm_key[16]) {
282 *out_is_avx = 0;
283
284 union {
285 uint64_t u[2];
286 uint8_t c[16];
287 } H;
288
289 OPENSSL_memcpy(H.c, gcm_key, 16);
290
291 // H is stored in host byte order
292 H.u[0] = CRYPTO_bswap8(H.u[0]);
293 H.u[1] = CRYPTO_bswap8(H.u[1]);
294
295 OPENSSL_memcpy(out_key, H.c, 16);
296
297#if defined(GHASH_ASM_X86_64)
298 if (crypto_gcm_clmul_enabled()) {
299 if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE
300 gcm_init_avx(out_table, H.u);
301 *out_mult = gcm_gmult_avx;
302 *out_hash = gcm_ghash_avx;
303 *out_is_avx = 1;
304 return;
305 }
306 gcm_init_clmul(out_table, H.u);
307 *out_mult = gcm_gmult_clmul;
308 *out_hash = gcm_ghash_clmul;
309 return;
310 }
311 if (gcm_ssse3_capable()) {
312 gcm_init_ssse3(out_table, H.u);
313 *out_mult = gcm_gmult_ssse3;
314 *out_hash = gcm_ghash_ssse3;
315 return;
316 }
317#elif defined(GHASH_ASM_X86)
318 if (crypto_gcm_clmul_enabled()) {
319 gcm_init_clmul(out_table, H.u);
320 *out_mult = gcm_gmult_clmul;
321 *out_hash = gcm_ghash_clmul;
322 return;
323 }
324 if (gcm_ssse3_capable()) {
325 gcm_init_ssse3(out_table, H.u);
326 *out_mult = gcm_gmult_ssse3;
327 *out_hash = gcm_ghash_ssse3;
328 return;
329 }
330#elif defined(GHASH_ASM_ARM)
331 if (gcm_pmull_capable()) {
332 gcm_init_v8(out_table, H.u);
333 *out_mult = gcm_gmult_v8;
334 *out_hash = gcm_ghash_v8;
335 return;
336 }
337
338 if (gcm_neon_capable()) {
339 gcm_init_neon(out_table, H.u);
340 *out_mult = gcm_gmult_neon;
341 *out_hash = gcm_ghash_neon;
342 return;
343 }
344#elif defined(GHASH_ASM_PPC64LE)
345 if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
346 gcm_init_p8(out_table, H.u);
347 *out_mult = gcm_gmult_p8;
348 *out_hash = gcm_ghash_p8;
349 return;
350 }
351#endif
352
353 gcm_init_4bit(out_table, H.u);
354#if defined(GHASH_ASM_X86)
355 *out_mult = gcm_gmult_4bit_mmx;
356 *out_hash = gcm_ghash_4bit_mmx;
357#else
358 *out_mult = gcm_gmult_4bit;
359 *out_hash = gcm_ghash_4bit;
360#endif
361}
362
363void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key, const AES_KEY *aes_key,
364 block128_f block, int block_is_hwaes) {
365 OPENSSL_memset(gcm_key, 0, sizeof(*gcm_key));
366 gcm_key->block = block;
367
368 uint8_t ghash_key[16];
369 OPENSSL_memset(ghash_key, 0, sizeof(ghash_key));
370 (*block)(ghash_key, ghash_key, aes_key);
371
372 int is_avx;
373 CRYPTO_ghash_init(&gcm_key->gmult, &gcm_key->ghash, &gcm_key->H,
374 gcm_key->Htable, &is_avx, ghash_key);
375
376 gcm_key->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
377}
378
379void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const AES_KEY *key,
380 const uint8_t *iv, size_t len) {
381#ifdef GCM_FUNCREF_4BIT
382 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
383 ctx->gcm_key.gmult;
384#endif
385
386 ctx->Yi.u[0] = 0;
387 ctx->Yi.u[1] = 0;
388 ctx->Xi.u[0] = 0;
389 ctx->Xi.u[1] = 0;
390 ctx->len.u[0] = 0; // AAD length
391 ctx->len.u[1] = 0; // message length
392 ctx->ares = 0;
393 ctx->mres = 0;
394
395 uint32_t ctr;
396 if (len == 12) {
397 OPENSSL_memcpy(ctx->Yi.c, iv, 12);
398 ctx->Yi.c[15] = 1;
399 ctr = 1;
400 } else {
401 uint64_t len0 = len;
402
403 while (len >= 16) {
404 for (size_t i = 0; i < 16; ++i) {
405 ctx->Yi.c[i] ^= iv[i];
406 }
407 GCM_MUL(ctx, Yi);
408 iv += 16;
409 len -= 16;
410 }
411 if (len) {
412 for (size_t i = 0; i < len; ++i) {
413 ctx->Yi.c[i] ^= iv[i];
414 }
415 GCM_MUL(ctx, Yi);
416 }
417 len0 <<= 3;
418 ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
419
420 GCM_MUL(ctx, Yi);
421 ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
422 }
423
424 (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EK0.c, key);
425 ++ctr;
426 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
427}
428
429int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
430#ifdef GCM_FUNCREF_4BIT
431 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
432 ctx->gcm_key.gmult;
433#ifdef GHASH
434 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
435 size_t len) = ctx->gcm_key.ghash;
436#endif
437#endif
438
439 if (ctx->len.u[1]) {
440 return 0;
441 }
442
443 uint64_t alen = ctx->len.u[0] + len;
444 if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
445 return 0;
446 }
447 ctx->len.u[0] = alen;
448
449 unsigned n = ctx->ares;
450 if (n) {
451 while (n && len) {
452 ctx->Xi.c[n] ^= *(aad++);
453 --len;
454 n = (n + 1) % 16;
455 }
456 if (n == 0) {
457 GCM_MUL(ctx, Xi);
458 } else {
459 ctx->ares = n;
460 return 1;
461 }
462 }
463
464 // Process a whole number of blocks.
465 size_t len_blocks = len & kSizeTWithoutLower4Bits;
466 if (len_blocks != 0) {
467 GHASH(ctx, aad, len_blocks);
468 aad += len_blocks;
469 len -= len_blocks;
470 }
471
472 // Process the remainder.
473 if (len != 0) {
474 n = (unsigned int)len;
475 for (size_t i = 0; i < len; ++i) {
476 ctx->Xi.c[i] ^= aad[i];
477 }
478 }
479
480 ctx->ares = n;
481 return 1;
482}
483
484int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
485 const uint8_t *in, uint8_t *out, size_t len) {
486 block128_f block = ctx->gcm_key.block;
487#ifdef GCM_FUNCREF_4BIT
488 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
489 ctx->gcm_key.gmult;
490 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
491 size_t len) = ctx->gcm_key.ghash;
492#endif
493
494 uint64_t mlen = ctx->len.u[1] + len;
495 if (mlen > ((UINT64_C(1) << 36) - 32) ||
496 (sizeof(len) == 8 && mlen < len)) {
497 return 0;
498 }
499 ctx->len.u[1] = mlen;
500
501 if (ctx->ares) {
502 // First call to encrypt finalizes GHASH(AAD)
503 GCM_MUL(ctx, Xi);
504 ctx->ares = 0;
505 }
506
507 unsigned n = ctx->mres;
508 if (n) {
509 while (n && len) {
510 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
511 --len;
512 n = (n + 1) % 16;
513 }
514 if (n == 0) {
515 GCM_MUL(ctx, Xi);
516 } else {
517 ctx->mres = n;
518 return 1;
519 }
520 }
521
522 uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
523 while (len >= GHASH_CHUNK) {
524 size_t j = GHASH_CHUNK;
525
526 while (j) {
527 (*block)(ctx->Yi.c, ctx->EKi.c, key);
528 ++ctr;
529 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
530 for (size_t i = 0; i < 16; i += sizeof(size_t)) {
531 store_word_le(out + i,
532 load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
533 }
534 out += 16;
535 in += 16;
536 j -= 16;
537 }
538 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
539 len -= GHASH_CHUNK;
540 }
541 size_t len_blocks = len & kSizeTWithoutLower4Bits;
542 if (len_blocks != 0) {
543 while (len >= 16) {
544 (*block)(ctx->Yi.c, ctx->EKi.c, key);
545 ++ctr;
546 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
547 for (size_t i = 0; i < 16; i += sizeof(size_t)) {
548 store_word_le(out + i,
549 load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
550 }
551 out += 16;
552 in += 16;
553 len -= 16;
554 }
555 GHASH(ctx, out - len_blocks, len_blocks);
556 }
557 if (len) {
558 (*block)(ctx->Yi.c, ctx->EKi.c, key);
559 ++ctr;
560 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
561 while (len--) {
562 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
563 ++n;
564 }
565 }
566
567 ctx->mres = n;
568 return 1;
569}
570
571int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
572 const unsigned char *in, unsigned char *out,
573 size_t len) {
574 block128_f block = ctx->gcm_key.block;
575#ifdef GCM_FUNCREF_4BIT
576 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
577 ctx->gcm_key.gmult;
578 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
579 size_t len) = ctx->gcm_key.ghash;
580#endif
581
582 uint64_t mlen = ctx->len.u[1] + len;
583 if (mlen > ((UINT64_C(1) << 36) - 32) ||
584 (sizeof(len) == 8 && mlen < len)) {
585 return 0;
586 }
587 ctx->len.u[1] = mlen;
588
589 if (ctx->ares) {
590 // First call to decrypt finalizes GHASH(AAD)
591 GCM_MUL(ctx, Xi);
592 ctx->ares = 0;
593 }
594
595 unsigned n = ctx->mres;
596 if (n) {
597 while (n && len) {
598 uint8_t c = *(in++);
599 *(out++) = c ^ ctx->EKi.c[n];
600 ctx->Xi.c[n] ^= c;
601 --len;
602 n = (n + 1) % 16;
603 }
604 if (n == 0) {
605 GCM_MUL(ctx, Xi);
606 } else {
607 ctx->mres = n;
608 return 1;
609 }
610 }
611
612 uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
613 while (len >= GHASH_CHUNK) {
614 size_t j = GHASH_CHUNK;
615
616 GHASH(ctx, in, GHASH_CHUNK);
617 while (j) {
618 (*block)(ctx->Yi.c, ctx->EKi.c, key);
619 ++ctr;
620 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
621 for (size_t i = 0; i < 16; i += sizeof(size_t)) {
622 store_word_le(out + i,
623 load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
624 }
625 out += 16;
626 in += 16;
627 j -= 16;
628 }
629 len -= GHASH_CHUNK;
630 }
631 size_t len_blocks = len & kSizeTWithoutLower4Bits;
632 if (len_blocks != 0) {
633 GHASH(ctx, in, len_blocks);
634 while (len >= 16) {
635 (*block)(ctx->Yi.c, ctx->EKi.c, key);
636 ++ctr;
637 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
638 for (size_t i = 0; i < 16; i += sizeof(size_t)) {
639 store_word_le(out + i,
640 load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
641 }
642 out += 16;
643 in += 16;
644 len -= 16;
645 }
646 }
647 if (len) {
648 (*block)(ctx->Yi.c, ctx->EKi.c, key);
649 ++ctr;
650 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
651 while (len--) {
652 uint8_t c = in[n];
653 ctx->Xi.c[n] ^= c;
654 out[n] = c ^ ctx->EKi.c[n];
655 ++n;
656 }
657 }
658
659 ctx->mres = n;
660 return 1;
661}
662
663int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
664 const uint8_t *in, uint8_t *out, size_t len,
665 ctr128_f stream) {
666#ifdef GCM_FUNCREF_4BIT
667 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
668 ctx->gcm_key.gmult;
669 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
670 size_t len) = ctx->gcm_key.ghash;
671#endif
672
673 uint64_t mlen = ctx->len.u[1] + len;
674 if (mlen > ((UINT64_C(1) << 36) - 32) ||
675 (sizeof(len) == 8 && mlen < len)) {
676 return 0;
677 }
678 ctx->len.u[1] = mlen;
679
680 if (ctx->ares) {
681 // First call to encrypt finalizes GHASH(AAD)
682 GCM_MUL(ctx, Xi);
683 ctx->ares = 0;
684 }
685
686 unsigned n = ctx->mres;
687 if (n) {
688 while (n && len) {
689 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
690 --len;
691 n = (n + 1) % 16;
692 }
693 if (n == 0) {
694 GCM_MUL(ctx, Xi);
695 } else {
696 ctx->mres = n;
697 return 1;
698 }
699 }
700
701#if defined(AESNI_GCM)
702 if (ctx->gcm_key.use_aesni_gcm_crypt) {
703 // |aesni_gcm_encrypt| may not process all the input given to it. It may
704 // not process *any* of its input if it is deemed too small.
705 size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
706 in += bulk;
707 out += bulk;
708 len -= bulk;
709 }
710#endif
711
712 uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
713 while (len >= GHASH_CHUNK) {
714 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
715 ctr += GHASH_CHUNK / 16;
716 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
717 GHASH(ctx, out, GHASH_CHUNK);
718 out += GHASH_CHUNK;
719 in += GHASH_CHUNK;
720 len -= GHASH_CHUNK;
721 }
722 size_t len_blocks = len & kSizeTWithoutLower4Bits;
723 if (len_blocks != 0) {
724 size_t j = len_blocks / 16;
725
726 (*stream)(in, out, j, key, ctx->Yi.c);
727 ctr += (unsigned int)j;
728 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
729 in += len_blocks;
730 len -= len_blocks;
731 GHASH(ctx, out, len_blocks);
732 out += len_blocks;
733 }
734 if (len) {
735 (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
736 ++ctr;
737 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
738 while (len--) {
739 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
740 ++n;
741 }
742 }
743
744 ctx->mres = n;
745 return 1;
746}
747
748int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
749 const uint8_t *in, uint8_t *out, size_t len,
750 ctr128_f stream) {
751#ifdef GCM_FUNCREF_4BIT
752 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
753 ctx->gcm_key.gmult;
754 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
755 size_t len) = ctx->gcm_key.ghash;
756#endif
757
758 uint64_t mlen = ctx->len.u[1] + len;
759 if (mlen > ((UINT64_C(1) << 36) - 32) ||
760 (sizeof(len) == 8 && mlen < len)) {
761 return 0;
762 }
763 ctx->len.u[1] = mlen;
764
765 if (ctx->ares) {
766 // First call to decrypt finalizes GHASH(AAD)
767 GCM_MUL(ctx, Xi);
768 ctx->ares = 0;
769 }
770
771 unsigned n = ctx->mres;
772 if (n) {
773 while (n && len) {
774 uint8_t c = *(in++);
775 *(out++) = c ^ ctx->EKi.c[n];
776 ctx->Xi.c[n] ^= c;
777 --len;
778 n = (n + 1) % 16;
779 }
780 if (n == 0) {
781 GCM_MUL(ctx, Xi);
782 } else {
783 ctx->mres = n;
784 return 1;
785 }
786 }
787
788#if defined(AESNI_GCM)
789 if (ctx->gcm_key.use_aesni_gcm_crypt) {
790 // |aesni_gcm_decrypt| may not process all the input given to it. It may
791 // not process *any* of its input if it is deemed too small.
792 size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
793 in += bulk;
794 out += bulk;
795 len -= bulk;
796 }
797#endif
798
799 uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
800 while (len >= GHASH_CHUNK) {
801 GHASH(ctx, in, GHASH_CHUNK);
802 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
803 ctr += GHASH_CHUNK / 16;
804 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
805 out += GHASH_CHUNK;
806 in += GHASH_CHUNK;
807 len -= GHASH_CHUNK;
808 }
809 size_t len_blocks = len & kSizeTWithoutLower4Bits;
810 if (len_blocks != 0) {
811 size_t j = len_blocks / 16;
812
813 GHASH(ctx, in, len_blocks);
814 (*stream)(in, out, j, key, ctx->Yi.c);
815 ctr += (unsigned int)j;
816 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
817 out += len_blocks;
818 in += len_blocks;
819 len -= len_blocks;
820 }
821 if (len) {
822 (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
823 ++ctr;
824 ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
825 while (len--) {
826 uint8_t c = in[n];
827 ctx->Xi.c[n] ^= c;
828 out[n] = c ^ ctx->EKi.c[n];
829 ++n;
830 }
831 }
832
833 ctx->mres = n;
834 return 1;
835}
836
837int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
838#ifdef GCM_FUNCREF_4BIT
839 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
840 ctx->gcm_key.gmult;
841#endif
842
843 if (ctx->mres || ctx->ares) {
844 GCM_MUL(ctx, Xi);
845 }
846
847 ctx->Xi.u[0] ^= CRYPTO_bswap8(ctx->len.u[0] << 3);
848 ctx->Xi.u[1] ^= CRYPTO_bswap8(ctx->len.u[1] << 3);
849 GCM_MUL(ctx, Xi);
850
851 ctx->Xi.u[0] ^= ctx->EK0.u[0];
852 ctx->Xi.u[1] ^= ctx->EK0.u[1];
853
854 if (tag && len <= sizeof(ctx->Xi)) {
855 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
856 } else {
857 return 0;
858 }
859}
860
861void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
862 CRYPTO_gcm128_finish(ctx, NULL, 0);
863 OPENSSL_memcpy(tag, ctx->Xi.c,
864 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
865}
866
867#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
868int crypto_gcm_clmul_enabled(void) {
869#ifdef GHASH_ASM
870 const uint32_t *ia32cap = OPENSSL_ia32cap_get();
871 return (ia32cap[0] & (1 << 24)) && // check FXSR bit
872 (ia32cap[1] & (1 << 1)); // check PCLMULQDQ bit
873#else
874 return 0;
875#endif
876}
877#endif
878