poly1305_arm.c source code [engine/third_party/boringssl/src/crypto/poly1305/poly1305_arm.c]

1	/ Copyright (c) 2014, Google Inc.*
2	*
3	* Permission to use, copy, modify, and/or distribute this software for any
4	* purpose with or without fee is hereby granted, provided that the above
5	* copyright notice and this permission notice appear in all copies.
6	*
7	* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8	* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9	* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10	* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12	* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13	* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15	// This implementation was taken from the public domain, neon2 version in
16	// SUPERCOP by D. J. Bernstein and Peter Schwabe.
17
18	#include <openssl/poly1305.h>
19
20	#include <string.h>
21
22	#include "../internal.h"
23	#include "internal.h"
24
25
26	#if defined(OPENSSL_POLY1305_NEON)
27
28	typedef struct {
29	uint32_t v[`12`]; // for alignment; only using 10
30	} fe1305x2;
31
32	#define addmulmod openssl_poly1305_neon2_addmulmod
33	#define blocks openssl_poly1305_neon2_blocks
34
35	extern void addmulmod(fe1305x2 r, const* fe1305x2 x, const* fe1305x2 *y,
36	const fe1305x2 *c);
37
38	extern int blocks(fe1305x2 h, const* fe1305x2 precomp, const* uint8_t *in,
39	unsigned int inlen);
40
41	static void freeze(fe1305x2 *r) {
42	int i;
43
44	uint32_t x0 = r->v[`0`];
45	uint32_t x1 = r->v[`2`];
46	uint32_t x2 = r->v[`4`];
47	uint32_t x3 = r->v[`6`];
48	uint32_t x4 = r->v[`8`];
49	uint32_t y0;
50	uint32_t y1;
51	uint32_t y2;
52	uint32_t y3;
53	uint32_t y4;
54	uint32_t swap;
55
56	for (i = `0`; i < `3`; ++i) {
57	x1 += x0 >> `26`;
58	x0 &= `0x3ffffff`;
59	x2 += x1 >> `26`;
60	x1 &= `0x3ffffff`;
61	x3 += x2 >> `26`;
62	x2 &= `0x3ffffff`;
63	x4 += x3 >> `26`;
64	x3 &= `0x3ffffff`;
65	x0 += `5` * (x4 >> `26`);
66	x4 &= `0x3ffffff`;
67	}
68
69	y0 = x0 + `5`;
70	y1 = x1 + (y0 >> `26`);
71	y0 &= `0x3ffffff`;
72	y2 = x2 + (y1 >> `26`);
73	y1 &= `0x3ffffff`;
74	y3 = x3 + (y2 >> `26`);
75	y2 &= `0x3ffffff`;
76	y4 = x4 + (y3 >> `26`);
77	y3 &= `0x3ffffff`;
78	swap = -(y4 >> `26`);
79	y4 &= `0x3ffffff`;
80
81	y0 ^= x0;
82	y1 ^= x1;
83	y2 ^= x2;
84	y3 ^= x3;
85	y4 ^= x4;
86
87	y0 &= swap;
88	y1 &= swap;
89	y2 &= swap;
90	y3 &= swap;
91	y4 &= swap;
92
93	y0 ^= x0;
94	y1 ^= x1;
95	y2 ^= x2;
96	y3 ^= x3;
97	y4 ^= x4;
98
99	r->v[`0`] = y0;
100	r->v[`2`] = y1;
101	r->v[`4`] = y2;
102	r->v[`6`] = y3;
103	r->v[`8`] = y4;
104	}
105
106	static void fe1305x2_tobytearray(uint8_t r, fe1305x2 x) {
107	uint32_t x0 = x->v[`0`];
108	uint32_t x1 = x->v[`2`];
109	uint32_t x2 = x->v[`4`];
110	uint32_t x3 = x->v[`6`];
111	uint32_t x4 = x->v[`8`];
112
113	x1 += x0 >> `26`;
114	x0 &= `0x3ffffff`;
115	x2 += x1 >> `26`;
116	x1 &= `0x3ffffff`;
117	x3 += x2 >> `26`;
118	x2 &= `0x3ffffff`;
119	x4 += x3 >> `26`;
120	x3 &= `0x3ffffff`;
121
122	(uint32_t )r = x0 + (x1 << `26`);
123	(uint32_t )(r + `4`) = (x1 >> `6`) + (x2 << `20`);
124	(uint32_t )(r + `8`) = (x2 >> `12`) + (x3 << `14`);
125	(uint32_t )(r + `12`) = (x3 >> `18`) + (x4 << `8`);
126	}
127
128	// load32 exists to avoid breaking strict aliasing rules in
129	// fe1305x2_frombytearray.
130	static uint32_t load32(uint8_t *t) {
131	uint32_t tmp;
132	OPENSSL_memcpy(&tmp, t, sizeof(tmp));
133	return tmp;
134	}
135
136	static void fe1305x2_frombytearray(fe1305x2 r, const* uint8_t *x,
137	unsigned long long xlen) {
138	unsigned i;
139	uint8_t t[`17`];
140
141	for (i = `0`; (i < `16`) && (i < xlen); i++) {
142	t[i] = x[i];
143	}
144	xlen -= i;
145	x += i;
146	t[i++] = `1`;
147	for (; i < `17`; i++) {
148	t[i] = `0`;
149	}
150
151	r->v[`0`] = `0x3ffffff` & load32(t);
152	r->v[`2`] = `0x3ffffff` & (load32(t + `3`) >> `2`);
153	r->v[`4`] = `0x3ffffff` & (load32(t + `6`) >> `4`);
154	r->v[`6`] = `0x3ffffff` & (load32(t + `9`) >> `6`);
155	r->v[`8`] = load32(t + `13`);
156
157	if (xlen) {
158	for (i = `0`; (i < `16`) && (i < xlen); i++) {
159	t[i] = x[i];
160	}
161	t[i++] = `1`;
162	for (; i < `17`; i++) {
163	t[i] = `0`;
164	}
165
166	r->v[`1`] = `0x3ffffff` & load32(t);
167	r->v[`3`] = `0x3ffffff` & (load32(t + `3`) >> `2`);
168	r->v[`5`] = `0x3ffffff` & (load32(t + `6`) >> `4`);
169	r->v[`7`] = `0x3ffffff` & (load32(t + `9`) >> `6`);
170	r->v[`9`] = load32(t + `13`);
171	} else {
172	r->v[`1`] = r->v[`3`] = r->v[`5`] = r->v[`7`] = r->v[`9`] = `0`;
173	}
174	}
175
176	static const alignas(`16`) fe1305x2 zero;
177
178	struct poly1305_state_st {
179	uint8_t data[sizeof(fe1305x2[`5`]) + `128`];
180	uint8_t buf[`32`];
181	unsigned int buf_used;
182	uint8_t key[`16`];
183	};
184
185	void CRYPTO_poly1305_init_neon(poly1305_state state, const* uint8_t key[`32`]) {
186	struct poly1305_state_st st = (struct* poly1305_state_st *)(state);
187	fe1305x2 *const r = (fe1305x2 )(st->data + (`15` & (-(int*)st->data)));
188	fe1305x2 *const h = r + `1`;
189	fe1305x2 *const c = h + `1`;
190	fe1305x2 *const precomp = c + `1`;
191	unsigned int j;
192
193	r->v[`1`] = r->v[`0`] = `0x3ffffff` & (uint32_t )key;
194	r->v[`3`] = r->v[`2`] = `0x3ffff03` & (((uint32_t )(key + `3`)) >> `2`);
195	r->v[`5`] = r->v[`4`] = `0x3ffc0ff` & (((uint32_t )(key + `6`)) >> `4`);
196	r->v[`7`] = r->v[`6`] = `0x3f03fff` & (((uint32_t )(key + `9`)) >> `6`);
197	r->v[`9`] = r->v[`8`] = `0x00fffff` & (((uint32_t )(key + `12`)) >> `8`);
198
199	for (j = `0`; j < `10`; j++) {
200	h->v[j] = `0`; // XXX: should fast-forward a bit
201	}
202
203	addmulmod(precomp, r, r, &zero); // precompute r^2
204	addmulmod(precomp + `1`, precomp, precomp, &zero); // precompute r^4
205
206	OPENSSL_memcpy(st->key, key + `16`, `16`);
207	st->buf_used = `0`;
208	}
209
210	void CRYPTO_poly1305_update_neon(poly1305_state state, const* uint8_t *in,
211	size_t in_len) {
212	struct poly1305_state_st st = (struct* poly1305_state_st *)(state);
213	fe1305x2 *const r = (fe1305x2 )(st->data + (`15` & (-(int*)st->data)));
214	fe1305x2 *const h = r + `1`;
215	fe1305x2 *const c = h + `1`;
216	fe1305x2 *const precomp = c + `1`;
217	unsigned int i;
218
219	if (st->buf_used) {
220	unsigned int todo = `32` - st->buf_used;
221	if (todo > in_len) {
222	todo = in_len;
223	}
224	for (i = `0`; i < todo; i++) {
225	st->buf[st->buf_used + i] = in[i];
226	}
227	st->buf_used += todo;
228	in_len -= todo;
229	in += todo;
230
231	if (st->buf_used == sizeof(st->buf) && in_len) {
232	addmulmod(h, h, precomp, &zero);
233	fe1305x2_frombytearray(c, st->buf, sizeof(st->buf));
234	for (i = `0`; i < `10`; i++) {
235	h->v[i] += c->v[i];
236	}
237	st->buf_used = `0`;
238	}
239	}
240
241	while (in_len > `32`) {
242	unsigned int tlen = `1048576`;
243	if (in_len < tlen) {
244	tlen = in_len;
245	}
246	tlen -= blocks(h, precomp, in, tlen);
247	in_len -= tlen;
248	in += tlen;
249	}
250
251	if (in_len) {
252	for (i = `0`; i < in_len; i++) {
253	st->buf[i] = in[i];
254	}
255	st->buf_used = in_len;
256	}
257	}
258
259	void CRYPTO_poly1305_finish_neon(poly1305_state *state, uint8_t mac[`16`]) {
260	struct poly1305_state_st st = (struct* poly1305_state_st *)(state);
261	fe1305x2 *const r = (fe1305x2 )(st->data + (`15` & (-(int*)st->data)));
262	fe1305x2 *const h = r + `1`;
263	fe1305x2 *const c = h + `1`;
264	fe1305x2 *const precomp = c + `1`;
265
266	addmulmod(h, h, precomp, &zero);
267
268	if (st->buf_used > `16`) {
269	fe1305x2_frombytearray(c, st->buf, st->buf_used);
270	precomp->v[`1`] = r->v[`1`];
271	precomp->v[`3`] = r->v[`3`];
272	precomp->v[`5`] = r->v[`5`];
273	precomp->v[`7`] = r->v[`7`];
274	precomp->v[`9`] = r->v[`9`];
275	addmulmod(h, h, precomp, c);
276	} else if (st->buf_used > `0`) {
277	fe1305x2_frombytearray(c, st->buf, st->buf_used);
278	r->v[`1`] = `1`;
279	r->v[`3`] = `0`;
280	r->v[`5`] = `0`;
281	r->v[`7`] = `0`;
282	r->v[`9`] = `0`;
283	addmulmod(h, h, r, c);
284	}
285
286	h->v[`0`] += h->v[`1`];
287	h->v[`2`] += h->v[`3`];
288	h->v[`4`] += h->v[`5`];
289	h->v[`6`] += h->v[`7`];
290	h->v[`8`] += h->v[`9`];
291	freeze(h);
292
293	fe1305x2_frombytearray(c, st->key, `16`);
294	c->v[`8`] ^= (`1` << `24`);
295
296	h->v[`0`] += c->v[`0`];
297	h->v[`2`] += c->v[`2`];
298	h->v[`4`] += c->v[`4`];
299	h->v[`6`] += c->v[`6`];
300	h->v[`8`] += c->v[`8`];
301	fe1305x2_tobytearray(mac, h);
302	}
303
304	#endif // OPENSSL_POLY1305_NEON
305

Browse the source code of engine/third_party/boringssl/src/crypto/poly1305/poly1305_arm.c