gcm.c source code [engine/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c]

1	/ ====================================================================*
2	* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	*
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	*
11	* 2. Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in
13	* the documentation and/or other materials provided with the
14	* distribution.
15	*
16	* 3. All advertising materials mentioning features or use of this
17	* software must display the following acknowledgment:
18	* "This product includes software developed by the OpenSSL Project
19	* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20	*
21	* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22	* endorse or promote products derived from this software without
23	* prior written permission. For written permission, please contact
24	* openssl-core@openssl.org.
25	*
26	* 5. Products derived from this software may not be called "OpenSSL"
27	* nor may "OpenSSL" appear in their names without prior written
28	* permission of the OpenSSL Project.
29	*
30	* 6. Redistributions of any form whatsoever must retain the following
31	* acknowledgment:
32	* "This product includes software developed by the OpenSSL Project
33	* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34	*
35	* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36	* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39	* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44	* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46	* OF THE POSSIBILITY OF SUCH DAMAGE.
47	* ==================================================================== */
48
49	#include <openssl/base.h>
50
51	#include <assert.h>
52	#include <string.h>
53
54	#include <openssl/mem.h>
55	#include <openssl/cpu.h>
56
57	#include "internal.h"
58	#include "../../internal.h"
59
60
61	#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
62	#define REDUCE1BIT(V) \
63	do { \
64	if (sizeof(size_t) == 8) { \
65	uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
66	(V).lo = ((V).hi << 63) \| ((V).lo >> 1); \
67	(V).hi = ((V).hi >> 1) ^ T; \
68	} else { \
69	uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \
70	(V).lo = ((V).hi << 63) \| ((V).lo >> 1); \
71	(V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \
72	} \
73	} while (0)
74
75	// kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
76	// bits of a \|size_t\|.
77	static const size_t kSizeTWithoutLower4Bits = (size_t) -`16`;
78
79	void gcm_init_4bit(u128 Htable[`16`], const uint64_t H[`2`]) {
80	u128 V;
81
82	Htable[`0`].hi = `0`;
83	Htable[`0`].lo = `0`;
84	V.hi = H[`0`];
85	V.lo = H[`1`];
86
87	Htable[`8`] = V;
88	REDUCE1BIT(V);
89	Htable[`4`] = V;
90	REDUCE1BIT(V);
91	Htable[`2`] = V;
92	REDUCE1BIT(V);
93	Htable[`1`] = V;
94	Htable[`3`].hi = V.hi ^ Htable[`2`].hi, Htable[`3`].lo = V.lo ^ Htable[`2`].lo;
95	V = Htable[`4`];
96	Htable[`5`].hi = V.hi ^ Htable[`1`].hi, Htable[`5`].lo = V.lo ^ Htable[`1`].lo;
97	Htable[`6`].hi = V.hi ^ Htable[`2`].hi, Htable[`6`].lo = V.lo ^ Htable[`2`].lo;
98	Htable[`7`].hi = V.hi ^ Htable[`3`].hi, Htable[`7`].lo = V.lo ^ Htable[`3`].lo;
99	V = Htable[`8`];
100	Htable[`9`].hi = V.hi ^ Htable[`1`].hi, Htable[`9`].lo = V.lo ^ Htable[`1`].lo;
101	Htable[`10`].hi = V.hi ^ Htable[`2`].hi, Htable[`10`].lo = V.lo ^ Htable[`2`].lo;
102	Htable[`11`].hi = V.hi ^ Htable[`3`].hi, Htable[`11`].lo = V.lo ^ Htable[`3`].lo;
103	Htable[`12`].hi = V.hi ^ Htable[`4`].hi, Htable[`12`].lo = V.lo ^ Htable[`4`].lo;
104	Htable[`13`].hi = V.hi ^ Htable[`5`].hi, Htable[`13`].lo = V.lo ^ Htable[`5`].lo;
105	Htable[`14`].hi = V.hi ^ Htable[`6`].hi, Htable[`14`].lo = V.lo ^ Htable[`6`].lo;
106	Htable[`15`].hi = V.hi ^ Htable[`7`].hi, Htable[`15`].lo = V.lo ^ Htable[`7`].lo;
107
108	#if defined(GHASH_ASM) && defined(OPENSSL_ARM)
109	for (int j = `0`; j < `16`; ++j) {
110	V = Htable[j];
111	Htable[j].hi = V.lo;
112	Htable[j].lo = V.hi;
113	}
114	#endif
115	}
116
117	#if !defined(GHASH_ASM) \|\| defined(OPENSSL_AARCH64) \|\| defined(OPENSSL_PPC64LE)
118	static const size_t rem_4bit[`16`] = {
119	PACK(`0x0000`), PACK(`0x1C20`), PACK(`0x3840`), PACK(`0x2460`),
120	PACK(`0x7080`), PACK(`0x6CA0`), PACK(`0x48C0`), PACK(`0x54E0`),
121	PACK(`0xE100`), PACK(`0xFD20`), PACK(`0xD940`), PACK(`0xC560`),
122	PACK(`0x9180`), PACK(`0x8DA0`), PACK(`0xA9C0`), PACK(`0xB5E0`)};
123
124	void gcm_gmult_4bit(uint64_t Xi[`2`], const u128 Htable[`16`]) {
125	u128 Z;
126	int cnt = `15`;
127	size_t rem, nlo, nhi;
128
129	nlo = ((const uint8_t *)Xi)[`15`];
130	nhi = nlo >> `4`;
131	nlo &= `0xf`;
132
133	Z.hi = Htable[nlo].hi;
134	Z.lo = Htable[nlo].lo;
135
136	while (`1`) {
137	rem = (size_t)Z.lo & `0xf`;
138	Z.lo = (Z.hi << `60`) \| (Z.lo >> `4`);
139	Z.hi = (Z.hi >> `4`);
140	if (sizeof(size_t) == `8`) {
141	Z.hi ^= rem_4bit[rem];
142	} else {
143	Z.hi ^= (uint64_t)rem_4bit[rem] << `32`;
144	}
145
146	Z.hi ^= Htable[nhi].hi;
147	Z.lo ^= Htable[nhi].lo;
148
149	if (--cnt < `0`) {
150	break;
151	}
152
153	nlo = ((const uint8_t *)Xi)[cnt];
154	nhi = nlo >> `4`;
155	nlo &= `0xf`;
156
157	rem = (size_t)Z.lo & `0xf`;
158	Z.lo = (Z.hi << `60`) \| (Z.lo >> `4`);
159	Z.hi = (Z.hi >> `4`);
160	if (sizeof(size_t) == `8`) {
161	Z.hi ^= rem_4bit[rem];
162	} else {
163	Z.hi ^= (uint64_t)rem_4bit[rem] << `32`;
164	}
165
166	Z.hi ^= Htable[nlo].hi;
167	Z.lo ^= Htable[nlo].lo;
168	}
169
170	Xi[`0`] = CRYPTO_bswap8(Z.hi);
171	Xi[`1`] = CRYPTO_bswap8(Z.lo);
172	}
173
174	// Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en\|de]crypt for
175	// details... Compiler-generated code doesn't seem to give any
176	// performance improvement, at least not on x86[_64]. It's here
177	// mostly as reference and a placeholder for possible future
178	// non-trivial optimization[s]...
179	void gcm_ghash_4bit(uint64_t Xi[`2`], const u128 Htable[`16`], const uint8_t *inp,
180	size_t len) {
181	u128 Z;
182	int cnt;
183	size_t rem, nlo, nhi;
184
185	do {
186	cnt = `15`;
187	nlo = ((const uint8_t *)Xi)[`15`];
188	nlo ^= inp[`15`];
189	nhi = nlo >> `4`;
190	nlo &= `0xf`;
191
192	Z.hi = Htable[nlo].hi;
193	Z.lo = Htable[nlo].lo;
194
195	while (`1`) {
196	rem = (size_t)Z.lo & `0xf`;
197	Z.lo = (Z.hi << `60`) \| (Z.lo >> `4`);
198	Z.hi = (Z.hi >> `4`);
199	if (sizeof(size_t) == `8`) {
200	Z.hi ^= rem_4bit[rem];
201	} else {
202	Z.hi ^= (uint64_t)rem_4bit[rem] << `32`;
203	}
204
205	Z.hi ^= Htable[nhi].hi;
206	Z.lo ^= Htable[nhi].lo;
207
208	if (--cnt < `0`) {
209	break;
210	}
211
212	nlo = ((const uint8_t *)Xi)[cnt];
213	nlo ^= inp[cnt];
214	nhi = nlo >> `4`;
215	nlo &= `0xf`;
216
217	rem = (size_t)Z.lo & `0xf`;
218	Z.lo = (Z.hi << `60`) \| (Z.lo >> `4`);
219	Z.hi = (Z.hi >> `4`);
220	if (sizeof(size_t) == `8`) {
221	Z.hi ^= rem_4bit[rem];
222	} else {
223	Z.hi ^= (uint64_t)rem_4bit[rem] << `32`;
224	}
225
226	Z.hi ^= Htable[nlo].hi;
227	Z.lo ^= Htable[nlo].lo;
228	}
229
230	Xi[`0`] = CRYPTO_bswap8(Z.hi);
231	Xi[`1`] = CRYPTO_bswap8(Z.lo);
232	} while (inp += `16`, len -= `16`);
233	}
234	#endif // !GHASH_ASM \|\| AARCH64 \|\| PPC64LE
235
236	#define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable)
237	#define GHASH(ctx, in, len) \
238	gcm_ghash_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
239	// GHASH_CHUNK is "stride parameter" missioned to mitigate cache
240	// trashing effect. In other words idea is to hash data while it's
241	// still in L1 cache after encryption pass...
242	#define GHASH_CHUNK (3 * 1024)
243
244	#if defined(GHASH_ASM_X86_64) \|\| defined(GHASH_ASM_X86)
245	void gcm_init_ssse3(u128 Htable[`16`], const uint64_t Xi[`2`]) {
246	// Run the existing 4-bit version.
247	gcm_init_4bit(Htable, Xi);
248
249	// First, swap hi and lo. The "4bit" version places hi first. It treats the
250	// two fields separately, so the order does not matter, but ghash-ssse3 reads
251	// the entire state into one 128-bit register.
252	for (int i = `0`; i < `16`; i++) {
253	uint64_t tmp = Htable[i].hi;
254	Htable[i].hi = Htable[i].lo;
255	Htable[i].lo = tmp;
256	}
257
258	// Treat \|Htable\| as a 16x16 byte table and transpose it. Thus, Htable[i]
259	// contains the i'th byte of jH for all j.*
260	uint8_t Hbytes = (uint8_t )Htable;
261	for (int i = `0`; i < `16`; i++) {
262	for (int j = `0`; j < i; j++) {
263	uint8_t tmp = Hbytes[`16`*i + j];
264	Hbytes[`16`i + j] = Hbytes[`16`j + i];
265	Hbytes[`16`*j + i] = tmp;
266	}
267	}
268	}
269	#endif // GHASH_ASM_X86_64 \|\| GHASH_ASM_X86
270
271	#ifdef GCM_FUNCREF_4BIT
272	#undef GCM_MUL
273	#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable)
274	#undef GHASH
275	#define GHASH(ctx, in, len) \
276	(*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
277	#endif // GCM_FUNCREF_4BIT
278
279	void CRYPTO_ghash_init(gmult_func out_mult, ghash_func out_hash,
280	u128 out_key, u128 out_table[`16`], int* *out_is_avx,
281	const uint8_t gcm_key[`16`]) {
282	*out_is_avx = `0`;
283
284	union {
285	uint64_t u[`2`];
286	uint8_t c[`16`];
287	} H;
288
289	OPENSSL_memcpy(H.c, gcm_key, `16`);
290
291	// H is stored in host byte order
292	H.u[`0`] = CRYPTO_bswap8(H.u[`0`]);
293	H.u[`1`] = CRYPTO_bswap8(H.u[`1`]);
294
295	OPENSSL_memcpy(out_key, H.c, `16`);
296
297	#if defined(GHASH_ASM_X86_64)
298	if (crypto_gcm_clmul_enabled()) {
299	if (((OPENSSL_ia32cap_get()[`1`] >> `22`) & `0x41`) == `0x41`) { // AVX+MOVBE
300	gcm_init_avx(out_table, H.u);
301	*out_mult = gcm_gmult_avx;
302	*out_hash = gcm_ghash_avx;
303	*out_is_avx = `1`;
304	return;
305	}
306	gcm_init_clmul(out_table, H.u);
307	*out_mult = gcm_gmult_clmul;
308	*out_hash = gcm_ghash_clmul;
309	return;
310	}
311	if (gcm_ssse3_capable()) {
312	gcm_init_ssse3(out_table, H.u);
313	*out_mult = gcm_gmult_ssse3;
314	*out_hash = gcm_ghash_ssse3;
315	return;
316	}
317	#elif defined(GHASH_ASM_X86)
318	if (crypto_gcm_clmul_enabled()) {
319	gcm_init_clmul(out_table, H.u);
320	*out_mult = gcm_gmult_clmul;
321	*out_hash = gcm_ghash_clmul;
322	return;
323	}
324	if (gcm_ssse3_capable()) {
325	gcm_init_ssse3(out_table, H.u);
326	*out_mult = gcm_gmult_ssse3;
327	*out_hash = gcm_ghash_ssse3;
328	return;
329	}
330	#elif defined(GHASH_ASM_ARM)
331	if (gcm_pmull_capable()) {
332	gcm_init_v8(out_table, H.u);
333	*out_mult = gcm_gmult_v8;
334	*out_hash = gcm_ghash_v8;
335	return;
336	}
337
338	if (gcm_neon_capable()) {
339	gcm_init_neon(out_table, H.u);
340	*out_mult = gcm_gmult_neon;
341	*out_hash = gcm_ghash_neon;
342	return;
343	}
344	#elif defined(GHASH_ASM_PPC64LE)
345	if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
346	gcm_init_p8(out_table, H.u);
347	*out_mult = gcm_gmult_p8;
348	*out_hash = gcm_ghash_p8;
349	return;
350	}
351	#endif
352
353	gcm_init_4bit(out_table, H.u);
354	#if defined(GHASH_ASM_X86)
355	*out_mult = gcm_gmult_4bit_mmx;
356	*out_hash = gcm_ghash_4bit_mmx;
357	#else
358	*out_mult = gcm_gmult_4bit;
359	*out_hash = gcm_ghash_4bit;
360	#endif
361	}
362
363	void CRYPTO_gcm128_init_key(GCM128_KEY gcm_key, const* AES_KEY *aes_key,
364	block128_f block, int block_is_hwaes) {
365	OPENSSL_memset(gcm_key, `0`, sizeof(*gcm_key));
366	gcm_key->block = block;
367
368	uint8_t ghash_key[`16`];
369	OPENSSL_memset(ghash_key, `0`, sizeof(ghash_key));
370	(*block)(ghash_key, ghash_key, aes_key);
371
372	int is_avx;
373	CRYPTO_ghash_init(&gcm_key->gmult, &gcm_key->ghash, &gcm_key->H,
374	gcm_key->Htable, &is_avx, ghash_key);
375
376	gcm_key->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? `1` : `0`;
377	}
378
379	void CRYPTO_gcm128_setiv(GCM128_CONTEXT ctx, const* AES_KEY *key,
380	const uint8_t *iv, size_t len) {
381	#ifdef GCM_FUNCREF_4BIT
382	void (gcm_gmult_p)(uint64_t Xi[`2`], const* u128 Htable[`16`]) =
383	ctx->gcm_key.gmult;
384	#endif
385
386	ctx->Yi.u[`0`] = `0`;
387	ctx->Yi.u[`1`] = `0`;
388	ctx->Xi.u[`0`] = `0`;
389	ctx->Xi.u[`1`] = `0`;
390	ctx->len.u[`0`] = `0`; // AAD length
391	ctx->len.u[`1`] = `0`; // message length
392	ctx->ares = `0`;
393	ctx->mres = `0`;
394
395	uint32_t ctr;
396	if (len == `12`) {
397	OPENSSL_memcpy(ctx->Yi.c, iv, `12`);
398	ctx->Yi.c[`15`] = `1`;
399	ctr = `1`;
400	} else {
401	uint64_t len0 = len;
402
403	while (len >= `16`) {
404	for (size_t i = `0`; i < `16`; ++i) {
405	ctx->Yi.c[i] ^= iv[i];
406	}
407	GCM_MUL(ctx, Yi);
408	iv += `16`;
409	len -= `16`;
410	}
411	if (len) {
412	for (size_t i = `0`; i < len; ++i) {
413	ctx->Yi.c[i] ^= iv[i];
414	}
415	GCM_MUL(ctx, Yi);
416	}
417	len0 <<= `3`;
418	ctx->Yi.u[`1`] ^= CRYPTO_bswap8(len0);
419
420	GCM_MUL(ctx, Yi);
421	ctr = CRYPTO_bswap4(ctx->Yi.d[`3`]);
422	}
423
424	(*ctx->gcm_key.block)(ctx->Yi.c, ctx->EK0.c, key);
425	++ctr;
426	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
427	}
428
429	int CRYPTO_gcm128_aad(GCM128_CONTEXT ctx, const* uint8_t *aad, size_t len) {
430	#ifdef GCM_FUNCREF_4BIT
431	void (gcm_gmult_p)(uint64_t Xi[`2`], const* u128 Htable[`16`]) =
432	ctx->gcm_key.gmult;
433	#ifdef GHASH
434	void (gcm_ghash_p)(uint64_t Xi[`2`], const* u128 Htable[`16`], const uint8_t *inp,
435	size_t len) = ctx->gcm_key.ghash;
436	#endif
437	#endif
438
439	if (ctx->len.u[`1`]) {
440	return `0`;
441	}
442
443	uint64_t alen = ctx->len.u[`0`] + len;
444	if (alen > (UINT64_C(`1`) << `61`) \|\| (sizeof(len) == `8` && alen < len)) {
445	return `0`;
446	}
447	ctx->len.u[`0`] = alen;
448
449	unsigned n = ctx->ares;
450	if (n) {
451	while (n && len) {
452	ctx->Xi.c[n] ^= *(aad++);
453	--len;
454	n = (n + `1`) % `16`;
455	}
456	if (n == `0`) {
457	GCM_MUL(ctx, Xi);
458	} else {
459	ctx->ares = n;
460	return `1`;
461	}
462	}
463
464	// Process a whole number of blocks.
465	size_t len_blocks = len & kSizeTWithoutLower4Bits;
466	if (len_blocks != `0`) {
467	GHASH(ctx, aad, len_blocks);
468	aad += len_blocks;
469	len -= len_blocks;
470	}
471
472	// Process the remainder.
473	if (len != `0`) {
474	n = (unsigned int)len;
475	for (size_t i = `0`; i < len; ++i) {
476	ctx->Xi.c[i] ^= aad[i];
477	}
478	}
479
480	ctx->ares = n;
481	return `1`;
482	}
483
484	int CRYPTO_gcm128_encrypt(GCM128_CONTEXT ctx, const* AES_KEY *key,
485	const uint8_t in, uint8_t out, size_t len) {
486	block128_f block = ctx->gcm_key.block;
487	#ifdef GCM_FUNCREF_4BIT
488	void (gcm_gmult_p)(uint64_t Xi[`2`], const* u128 Htable[`16`]) =
489	ctx->gcm_key.gmult;
490	void (gcm_ghash_p)(uint64_t Xi[`2`], const* u128 Htable[`16`], const uint8_t *inp,
491	size_t len) = ctx->gcm_key.ghash;
492	#endif
493
494	uint64_t mlen = ctx->len.u[`1`] + len;
495	if (mlen > ((UINT64_C(`1`) << `36`) - `32`) \|\|
496	(sizeof(len) == `8` && mlen < len)) {
497	return `0`;
498	}
499	ctx->len.u[`1`] = mlen;
500
501	if (ctx->ares) {
502	// First call to encrypt finalizes GHASH(AAD)
503	GCM_MUL(ctx, Xi);
504	ctx->ares = `0`;
505	}
506
507	unsigned n = ctx->mres;
508	if (n) {
509	while (n && len) {
510	ctx->Xi.c[n] ^= (out++) = (in++) ^ ctx->EKi.c[n];
511	--len;
512	n = (n + `1`) % `16`;
513	}
514	if (n == `0`) {
515	GCM_MUL(ctx, Xi);
516	} else {
517	ctx->mres = n;
518	return `1`;
519	}
520	}
521
522	uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[`3`]);
523	while (len >= GHASH_CHUNK) {
524	size_t j = GHASH_CHUNK;
525
526	while (j) {
527	(*block)(ctx->Yi.c, ctx->EKi.c, key);
528	++ctr;
529	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
530	for (size_t i = `0`; i < `16`; i += sizeof(size_t)) {
531	store_word_le(out + i,
532	load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
533	}
534	out += `16`;
535	in += `16`;
536	j -= `16`;
537	}
538	GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
539	len -= GHASH_CHUNK;
540	}
541	size_t len_blocks = len & kSizeTWithoutLower4Bits;
542	if (len_blocks != `0`) {
543	while (len >= `16`) {
544	(*block)(ctx->Yi.c, ctx->EKi.c, key);
545	++ctr;
546	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
547	for (size_t i = `0`; i < `16`; i += sizeof(size_t)) {
548	store_word_le(out + i,
549	load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
550	}
551	out += `16`;
552	in += `16`;
553	len -= `16`;
554	}
555	GHASH(ctx, out - len_blocks, len_blocks);
556	}
557	if (len) {
558	(*block)(ctx->Yi.c, ctx->EKi.c, key);
559	++ctr;
560	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
561	while (len--) {
562	ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
563	++n;
564	}
565	}
566
567	ctx->mres = n;
568	return `1`;
569	}
570
571	int CRYPTO_gcm128_decrypt(GCM128_CONTEXT ctx, const* AES_KEY *key,
572	const unsigned char in, unsigned* char *out,
573	size_t len) {
574	block128_f block = ctx->gcm_key.block;
575	#ifdef GCM_FUNCREF_4BIT
576	void (gcm_gmult_p)(uint64_t Xi[`2`], const* u128 Htable[`16`]) =
577	ctx->gcm_key.gmult;
578	void (gcm_ghash_p)(uint64_t Xi[`2`], const* u128 Htable[`16`], const uint8_t *inp,
579	size_t len) = ctx->gcm_key.ghash;
580	#endif
581
582	uint64_t mlen = ctx->len.u[`1`] + len;
583	if (mlen > ((UINT64_C(`1`) << `36`) - `32`) \|\|
584	(sizeof(len) == `8` && mlen < len)) {
585	return `0`;
586	}
587	ctx->len.u[`1`] = mlen;
588
589	if (ctx->ares) {
590	// First call to decrypt finalizes GHASH(AAD)
591	GCM_MUL(ctx, Xi);
592	ctx->ares = `0`;
593	}
594
595	unsigned n = ctx->mres;
596	if (n) {
597	while (n && len) {
598	uint8_t c = *(in++);
599	*(out++) = c ^ ctx->EKi.c[n];
600	ctx->Xi.c[n] ^= c;
601	--len;
602	n = (n + `1`) % `16`;
603	}
604	if (n == `0`) {
605	GCM_MUL(ctx, Xi);
606	} else {
607	ctx->mres = n;
608	return `1`;
609	}
610	}
611
612	uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[`3`]);
613	while (len >= GHASH_CHUNK) {
614	size_t j = GHASH_CHUNK;
615
616	GHASH(ctx, in, GHASH_CHUNK);
617	while (j) {
618	(*block)(ctx->Yi.c, ctx->EKi.c, key);
619	++ctr;
620	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
621	for (size_t i = `0`; i < `16`; i += sizeof(size_t)) {
622	store_word_le(out + i,
623	load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
624	}
625	out += `16`;
626	in += `16`;
627	j -= `16`;
628	}
629	len -= GHASH_CHUNK;
630	}
631	size_t len_blocks = len & kSizeTWithoutLower4Bits;
632	if (len_blocks != `0`) {
633	GHASH(ctx, in, len_blocks);
634	while (len >= `16`) {
635	(*block)(ctx->Yi.c, ctx->EKi.c, key);
636	++ctr;
637	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
638	for (size_t i = `0`; i < `16`; i += sizeof(size_t)) {
639	store_word_le(out + i,
640	load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
641	}
642	out += `16`;
643	in += `16`;
644	len -= `16`;
645	}
646	}
647	if (len) {
648	(*block)(ctx->Yi.c, ctx->EKi.c, key);
649	++ctr;
650	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
651	while (len--) {
652	uint8_t c = in[n];
653	ctx->Xi.c[n] ^= c;
654	out[n] = c ^ ctx->EKi.c[n];
655	++n;
656	}
657	}
658
659	ctx->mres = n;
660	return `1`;
661	}
662
663	int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT ctx, const* AES_KEY *key,
664	const uint8_t in, uint8_t out, size_t len,
665	ctr128_f stream) {
666	#ifdef GCM_FUNCREF_4BIT
667	void (gcm_gmult_p)(uint64_t Xi[`2`], const* u128 Htable[`16`]) =
668	ctx->gcm_key.gmult;
669	void (gcm_ghash_p)(uint64_t Xi[`2`], const* u128 Htable[`16`], const uint8_t *inp,
670	size_t len) = ctx->gcm_key.ghash;
671	#endif
672
673	uint64_t mlen = ctx->len.u[`1`] + len;
674	if (mlen > ((UINT64_C(`1`) << `36`) - `32`) \|\|
675	(sizeof(len) == `8` && mlen < len)) {
676	return `0`;
677	}
678	ctx->len.u[`1`] = mlen;
679
680	if (ctx->ares) {
681	// First call to encrypt finalizes GHASH(AAD)
682	GCM_MUL(ctx, Xi);
683	ctx->ares = `0`;
684	}
685
686	unsigned n = ctx->mres;
687	if (n) {
688	while (n && len) {
689	ctx->Xi.c[n] ^= (out++) = (in++) ^ ctx->EKi.c[n];
690	--len;
691	n = (n + `1`) % `16`;
692	}
693	if (n == `0`) {
694	GCM_MUL(ctx, Xi);
695	} else {
696	ctx->mres = n;
697	return `1`;
698	}
699	}
700
701	#if defined(AESNI_GCM)
702	if (ctx->gcm_key.use_aesni_gcm_crypt) {
703	// \|aesni_gcm_encrypt\| may not process all the input given to it. It may
704	// not process any* of its input if it is deemed too small.*
705	size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
706	in += bulk;
707	out += bulk;
708	len -= bulk;
709	}
710	#endif
711
712	uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[`3`]);
713	while (len >= GHASH_CHUNK) {
714	(*stream)(in, out, GHASH_CHUNK / `16`, key, ctx->Yi.c);
715	ctr += GHASH_CHUNK / `16`;
716	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
717	GHASH(ctx, out, GHASH_CHUNK);
718	out += GHASH_CHUNK;
719	in += GHASH_CHUNK;
720	len -= GHASH_CHUNK;
721	}
722	size_t len_blocks = len & kSizeTWithoutLower4Bits;
723	if (len_blocks != `0`) {
724	size_t j = len_blocks / `16`;
725
726	(*stream)(in, out, j, key, ctx->Yi.c);
727	ctr += (unsigned int)j;
728	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
729	in += len_blocks;
730	len -= len_blocks;
731	GHASH(ctx, out, len_blocks);
732	out += len_blocks;
733	}
734	if (len) {
735	(*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
736	++ctr;
737	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
738	while (len--) {
739	ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
740	++n;
741	}
742	}
743
744	ctx->mres = n;
745	return `1`;
746	}
747
748	int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT ctx, const* AES_KEY *key,
749	const uint8_t in, uint8_t out, size_t len,
750	ctr128_f stream) {
751	#ifdef GCM_FUNCREF_4BIT
752	void (gcm_gmult_p)(uint64_t Xi[`2`], const* u128 Htable[`16`]) =
753	ctx->gcm_key.gmult;
754	void (gcm_ghash_p)(uint64_t Xi[`2`], const* u128 Htable[`16`], const uint8_t *inp,
755	size_t len) = ctx->gcm_key.ghash;
756	#endif
757
758	uint64_t mlen = ctx->len.u[`1`] + len;
759	if (mlen > ((UINT64_C(`1`) << `36`) - `32`) \|\|
760	(sizeof(len) == `8` && mlen < len)) {
761	return `0`;
762	}
763	ctx->len.u[`1`] = mlen;
764
765	if (ctx->ares) {
766	// First call to decrypt finalizes GHASH(AAD)
767	GCM_MUL(ctx, Xi);
768	ctx->ares = `0`;
769	}
770
771	unsigned n = ctx->mres;
772	if (n) {
773	while (n && len) {
774	uint8_t c = *(in++);
775	*(out++) = c ^ ctx->EKi.c[n];
776	ctx->Xi.c[n] ^= c;
777	--len;
778	n = (n + `1`) % `16`;
779	}
780	if (n == `0`) {
781	GCM_MUL(ctx, Xi);
782	} else {
783	ctx->mres = n;
784	return `1`;
785	}
786	}
787
788	#if defined(AESNI_GCM)
789	if (ctx->gcm_key.use_aesni_gcm_crypt) {
790	// \|aesni_gcm_decrypt\| may not process all the input given to it. It may
791	// not process any* of its input if it is deemed too small.*
792	size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
793	in += bulk;
794	out += bulk;
795	len -= bulk;
796	}
797	#endif
798
799	uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[`3`]);
800	while (len >= GHASH_CHUNK) {
801	GHASH(ctx, in, GHASH_CHUNK);
802	(*stream)(in, out, GHASH_CHUNK / `16`, key, ctx->Yi.c);
803	ctr += GHASH_CHUNK / `16`;
804	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
805	out += GHASH_CHUNK;
806	in += GHASH_CHUNK;
807	len -= GHASH_CHUNK;
808	}
809	size_t len_blocks = len & kSizeTWithoutLower4Bits;
810	if (len_blocks != `0`) {
811	size_t j = len_blocks / `16`;
812
813	GHASH(ctx, in, len_blocks);
814	(*stream)(in, out, j, key, ctx->Yi.c);
815	ctr += (unsigned int)j;
816	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
817	out += len_blocks;
818	in += len_blocks;
819	len -= len_blocks;
820	}
821	if (len) {
822	(*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
823	++ctr;
824	ctx->Yi.d[`3`] = CRYPTO_bswap4(ctr);
825	while (len--) {
826	uint8_t c = in[n];
827	ctx->Xi.c[n] ^= c;
828	out[n] = c ^ ctx->EKi.c[n];
829	++n;
830	}
831	}
832
833	ctx->mres = n;
834	return `1`;
835	}
836
837	int CRYPTO_gcm128_finish(GCM128_CONTEXT ctx, const* uint8_t *tag, size_t len) {
838	#ifdef GCM_FUNCREF_4BIT
839	void (gcm_gmult_p)(uint64_t Xi[`2`], const* u128 Htable[`16`]) =
840	ctx->gcm_key.gmult;
841	#endif
842
843	if (ctx->mres \|\| ctx->ares) {
844	GCM_MUL(ctx, Xi);
845	}
846
847	ctx->Xi.u[`0`] ^= CRYPTO_bswap8(ctx->len.u[`0`] << `3`);
848	ctx->Xi.u[`1`] ^= CRYPTO_bswap8(ctx->len.u[`1`] << `3`);
849	GCM_MUL(ctx, Xi);
850
851	ctx->Xi.u[`0`] ^= ctx->EK0.u[`0`];
852	ctx->Xi.u[`1`] ^= ctx->EK0.u[`1`];
853
854	if (tag && len <= sizeof(ctx->Xi)) {
855	return CRYPTO_memcmp(ctx->Xi.c, tag, len) == `0`;
856	} else {
857	return `0`;
858	}
859	}
860
861	void CRYPTO_gcm128_tag(GCM128_CONTEXT ctx, unsigned* char *tag, size_t len) {
862	CRYPTO_gcm128_finish(ctx, NULL, `0`);
863	OPENSSL_memcpy(tag, ctx->Xi.c,
864	len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
865	}
866
867	#if defined(OPENSSL_X86) \|\| defined(OPENSSL_X86_64)
868	int crypto_gcm_clmul_enabled(void) {
869	#ifdef GHASH_ASM
870	const uint32_t *ia32cap = OPENSSL_ia32cap_get();
871	return (ia32cap[`0`] & (`1` << `24`)) && // check FXSR bit
872	(ia32cap[`1`] & (`1` << `1`)); // check PCLMULQDQ bit
873	#else
874	return `0`;
875	#endif
876	}
877	#endif
878

Browse the source code of engine/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c