x86_64-gcc.c source code [engine/third_party/boringssl/src/crypto/fipsmodule/bn/asm/x86_64-gcc.c]

1	/ x86_64 BIGNUM accelerator version 0.1, December 2002.*
2	*
3	* Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4	* project.
5	*
6	* Rights for redistribution and usage in source and binary forms are
7	* granted according to the OpenSSL license. Warranty of any kind is
8	* disclaimed.
9	*
10	* Q. Version 0.1? It doesn't sound like Andy, he used to assign real
11	* versions, like 1.0...
12	* A. Well, that's because this code is basically a quick-n-dirty
13	* proof-of-concept hack. As you can see it's implemented with
14	* inline assembler, which means that you're bound to GCC and that
15	* there might be enough room for further improvement.
16	*
17	* Q. Why inline assembler?
18	* A. x86_64 features own ABI which I'm not familiar with. This is
19	* why I decided to let the compiler take care of subroutine
20	* prologue/epilogue as well as register allocation. For reference.
21	* Win64 implements different ABI for AMD64, different from Linux.
22	*
23	* Q. How much faster does it get?
24	* A. 'apps/openssl speed rsa dsa' output with no-asm:
25	*
26	* sign verify sign/s verify/s
27	* rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
28	* rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
29	* rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
30	* rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
31	* sign verify sign/s verify/s
32	* dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
33	* dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
34	* dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
35	*
36	* 'apps/openssl speed rsa dsa' output with this module:
37	*
38	* sign verify sign/s verify/s
39	* rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
40	* rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
41	* rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
42	* rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
43	* sign verify sign/s verify/s
44	* dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
45	* dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
46	* dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
47	*
48	* For the reference. IA-32 assembler implementation performs
49	* very much like 64-bit code compiled with no-asm on the same
50	* machine.
51	*/
52
53	#include <openssl/bn.h>
54
55	// TODO(davidben): Get this file working on MSVC x64.
56	#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
57	(defined(__GNUC__) \|\| defined(__clang__))
58
59	#include "../internal.h"
60
61
62	#undef mul
63	#undef mul_add
64
65	// "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
66	// "g"(0) let the compiler to decide where does it
67	// want to keep the value of zero;
68	#define mul_add(r, a, word, carry) \
69	do { \
70	register BN_ULONG high, low; \
71	__asm__("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "m"(a) : "cc"); \
72	__asm__("addq %2,%0; adcq %3,%1" \
73	: "+r"(carry), "+d"(high) \
74	: "a"(low), "g"(0) \
75	: "cc"); \
76	__asm__("addq %2,%0; adcq %3,%1" \
77	: "+m"(r), "+d"(high) \
78	: "r"(carry), "g"(0) \
79	: "cc"); \
80	(carry) = high; \
81	} while (0)
82
83	#define mul(r, a, word, carry) \
84	do { \
85	register BN_ULONG high, low; \
86	__asm__("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "g"(a) : "cc"); \
87	__asm__("addq %2,%0; adcq %3,%1" \
88	: "+r"(carry), "+d"(high) \
89	: "a"(low), "g"(0) \
90	: "cc"); \
91	(r) = (carry); \
92	(carry) = high; \
93	} while (0)
94	#undef sqr
95	#define sqr(r0, r1, a) __asm__("mulq %2" : "=a"(r0), "=d"(r1) : "a"(a) : "cc");
96
97	BN_ULONG bn_mul_add_words(BN_ULONG rp, const* BN_ULONG *ap, size_t num,
98	BN_ULONG w) {
99	BN_ULONG c1 = `0`;
100
101	if (num == `0`) {
102	return (c1);
103	}
104
105	while (num & ~`3`) {
106	mul_add(rp[`0`], ap[`0`], w, c1);
107	mul_add(rp[`1`], ap[`1`], w, c1);
108	mul_add(rp[`2`], ap[`2`], w, c1);
109	mul_add(rp[`3`], ap[`3`], w, c1);
110	ap += `4`;
111	rp += `4`;
112	num -= `4`;
113	}
114	if (num) {
115	mul_add(rp[`0`], ap[`0`], w, c1);
116	if (--num == `0`) {
117	return c1;
118	}
119	mul_add(rp[`1`], ap[`1`], w, c1);
120	if (--num == `0`) {
121	return c1;
122	}
123	mul_add(rp[`2`], ap[`2`], w, c1);
124	return c1;
125	}
126
127	return c1;
128	}
129
130	BN_ULONG bn_mul_words(BN_ULONG rp, const* BN_ULONG *ap, size_t num,
131	BN_ULONG w) {
132	BN_ULONG c1 = `0`;
133
134	if (num == `0`) {
135	return c1;
136	}
137
138	while (num & ~`3`) {
139	mul(rp[`0`], ap[`0`], w, c1);
140	mul(rp[`1`], ap[`1`], w, c1);
141	mul(rp[`2`], ap[`2`], w, c1);
142	mul(rp[`3`], ap[`3`], w, c1);
143	ap += `4`;
144	rp += `4`;
145	num -= `4`;
146	}
147	if (num) {
148	mul(rp[`0`], ap[`0`], w, c1);
149	if (--num == `0`) {
150	return c1;
151	}
152	mul(rp[`1`], ap[`1`], w, c1);
153	if (--num == `0`) {
154	return c1;
155	}
156	mul(rp[`2`], ap[`2`], w, c1);
157	}
158	return c1;
159	}
160
161	void bn_sqr_words(BN_ULONG r, const* BN_ULONG *a, size_t n) {
162	if (n == `0`) {
163	return;
164	}
165
166	while (n & ~`3`) {
167	sqr(r[`0`], r[`1`], a[`0`]);
168	sqr(r[`2`], r[`3`], a[`1`]);
169	sqr(r[`4`], r[`5`], a[`2`]);
170	sqr(r[`6`], r[`7`], a[`3`]);
171	a += `4`;
172	r += `8`;
173	n -= `4`;
174	}
175	if (n) {
176	sqr(r[`0`], r[`1`], a[`0`]);
177	if (--n == `0`) {
178	return;
179	}
180	sqr(r[`2`], r[`3`], a[`1`]);
181	if (--n == `0`) {
182	return;
183	}
184	sqr(r[`4`], r[`5`], a[`2`]);
185	}
186	}
187
188	BN_ULONG bn_add_words(BN_ULONG rp, const* BN_ULONG ap, const* BN_ULONG *bp,
189	size_t n) {
190	BN_ULONG ret;
191	size_t i = `0`;
192
193	if (n == `0`) {
194	return `0`;
195	}
196
197	__asm__ volatile (
198	" subq %0,%0 \n" // clear carry
199	" jmp 1f \n"
200	".p2align 4 \n"
201	"1:"
202	" movq (%4,%2,8),%0 \n"
203	" adcq (%5,%2,8),%0 \n"
204	" movq %0,(%3,%2,8) \n"
205	" lea 1(%2),%2 \n"
206	" dec %1 \n"
207	" jnz 1b \n"
208	" sbbq %0,%0 \n"
209	: "=&r"(ret), "+c"(n), "+r"(i)
210	: "r"(rp), "r"(ap), "r"(bp)
211	: "cc", "memory");
212
213	return ret & `1`;
214	}
215
216	BN_ULONG bn_sub_words(BN_ULONG rp, const* BN_ULONG ap, const* BN_ULONG *bp,
217	size_t n) {
218	BN_ULONG ret;
219	size_t i = `0`;
220
221	if (n == `0`) {
222	return `0`;
223	}
224
225	__asm__ volatile (
226	" subq %0,%0 \n" // clear borrow
227	" jmp 1f \n"
228	".p2align 4 \n"
229	"1:"
230	" movq (%4,%2,8),%0 \n"
231	" sbbq (%5,%2,8),%0 \n"
232	" movq %0,(%3,%2,8) \n"
233	" lea 1(%2),%2 \n"
234	" dec %1 \n"
235	" jnz 1b \n"
236	" sbbq %0,%0 \n"
237	: "=&r"(ret), "+c"(n), "+r"(i)
238	: "r"(rp), "r"(ap), "r"(bp)
239	: "cc", "memory");
240
241	return ret & `1`;
242	}
243
244	// mul_add_c(a,b,c0,c1,c2) -- c+=ab for three word number c=(c2,c1,c0)*
245	// mul_add_c2(a,b,c0,c1,c2) -- c+=2ab for three word number c=(c2,c1,c0)
246	// sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0)
247	// sqr_add_c2(a,i,c0,c1,c2) -- c+=2a[i]a[j] for three word number c=(c2,c1,c0)
248
249	// Keep in mind that carrying into high part of multiplication result can not
250	// overflow, because it cannot be all-ones.
251	#define mul_add_c(a, b, c0, c1, c2) \
252	do { \
253	BN_ULONG t1, t2; \
254	__asm__("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
255	__asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
256	: "+r"(c0), "+r"(c1), "+r"(c2) \
257	: "r"(t1), "r"(t2), "g"(0) \
258	: "cc"); \
259	} while (0)
260
261	#define sqr_add_c(a, i, c0, c1, c2) \
262	do { \
263	BN_ULONG t1, t2; \
264	__asm__("mulq %2" : "=a"(t1), "=d"(t2) : "a"((a)[i]) : "cc"); \
265	__asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
266	: "+r"(c0), "+r"(c1), "+r"(c2) \
267	: "r"(t1), "r"(t2), "g"(0) \
268	: "cc"); \
269	} while (0)
270
271	#define mul_add_c2(a, b, c0, c1, c2) \
272	do { \
273	BN_ULONG t1, t2; \
274	__asm__("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
275	__asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
276	: "+r"(c0), "+r"(c1), "+r"(c2) \
277	: "r"(t1), "r"(t2), "g"(0) \
278	: "cc"); \
279	__asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
280	: "+r"(c0), "+r"(c1), "+r"(c2) \
281	: "r"(t1), "r"(t2), "g"(0) \
282	: "cc"); \
283	} while (0)
284
285	#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
286
287	void bn_mul_comba8(BN_ULONG r[`16`], const BN_ULONG a[`8`], const BN_ULONG b[`8`]) {
288	BN_ULONG c1, c2, c3;
289
290	c1 = `0`;
291	c2 = `0`;
292	c3 = `0`;
293	mul_add_c(a[`0`], b[`0`], c1, c2, c3);
294	r[`0`] = c1;
295	c1 = `0`;
296	mul_add_c(a[`0`], b[`1`], c2, c3, c1);
297	mul_add_c(a[`1`], b[`0`], c2, c3, c1);
298	r[`1`] = c2;
299	c2 = `0`;
300	mul_add_c(a[`2`], b[`0`], c3, c1, c2);
301	mul_add_c(a[`1`], b[`1`], c3, c1, c2);
302	mul_add_c(a[`0`], b[`2`], c3, c1, c2);
303	r[`2`] = c3;
304	c3 = `0`;
305	mul_add_c(a[`0`], b[`3`], c1, c2, c3);
306	mul_add_c(a[`1`], b[`2`], c1, c2, c3);
307	mul_add_c(a[`2`], b[`1`], c1, c2, c3);
308	mul_add_c(a[`3`], b[`0`], c1, c2, c3);
309	r[`3`] = c1;
310	c1 = `0`;
311	mul_add_c(a[`4`], b[`0`], c2, c3, c1);
312	mul_add_c(a[`3`], b[`1`], c2, c3, c1);
313	mul_add_c(a[`2`], b[`2`], c2, c3, c1);
314	mul_add_c(a[`1`], b[`3`], c2, c3, c1);
315	mul_add_c(a[`0`], b[`4`], c2, c3, c1);
316	r[`4`] = c2;
317	c2 = `0`;
318	mul_add_c(a[`0`], b[`5`], c3, c1, c2);
319	mul_add_c(a[`1`], b[`4`], c3, c1, c2);
320	mul_add_c(a[`2`], b[`3`], c3, c1, c2);
321	mul_add_c(a[`3`], b[`2`], c3, c1, c2);
322	mul_add_c(a[`4`], b[`1`], c3, c1, c2);
323	mul_add_c(a[`5`], b[`0`], c3, c1, c2);
324	r[`5`] = c3;
325	c3 = `0`;
326	mul_add_c(a[`6`], b[`0`], c1, c2, c3);
327	mul_add_c(a[`5`], b[`1`], c1, c2, c3);
328	mul_add_c(a[`4`], b[`2`], c1, c2, c3);
329	mul_add_c(a[`3`], b[`3`], c1, c2, c3);
330	mul_add_c(a[`2`], b[`4`], c1, c2, c3);
331	mul_add_c(a[`1`], b[`5`], c1, c2, c3);
332	mul_add_c(a[`0`], b[`6`], c1, c2, c3);
333	r[`6`] = c1;
334	c1 = `0`;
335	mul_add_c(a[`0`], b[`7`], c2, c3, c1);
336	mul_add_c(a[`1`], b[`6`], c2, c3, c1);
337	mul_add_c(a[`2`], b[`5`], c2, c3, c1);
338	mul_add_c(a[`3`], b[`4`], c2, c3, c1);
339	mul_add_c(a[`4`], b[`3`], c2, c3, c1);
340	mul_add_c(a[`5`], b[`2`], c2, c3, c1);
341	mul_add_c(a[`6`], b[`1`], c2, c3, c1);
342	mul_add_c(a[`7`], b[`0`], c2, c3, c1);
343	r[`7`] = c2;
344	c2 = `0`;
345	mul_add_c(a[`7`], b[`1`], c3, c1, c2);
346	mul_add_c(a[`6`], b[`2`], c3, c1, c2);
347	mul_add_c(a[`5`], b[`3`], c3, c1, c2);
348	mul_add_c(a[`4`], b[`4`], c3, c1, c2);
349	mul_add_c(a[`3`], b[`5`], c3, c1, c2);
350	mul_add_c(a[`2`], b[`6`], c3, c1, c2);
351	mul_add_c(a[`1`], b[`7`], c3, c1, c2);
352	r[`8`] = c3;
353	c3 = `0`;
354	mul_add_c(a[`2`], b[`7`], c1, c2, c3);
355	mul_add_c(a[`3`], b[`6`], c1, c2, c3);
356	mul_add_c(a[`4`], b[`5`], c1, c2, c3);
357	mul_add_c(a[`5`], b[`4`], c1, c2, c3);
358	mul_add_c(a[`6`], b[`3`], c1, c2, c3);
359	mul_add_c(a[`7`], b[`2`], c1, c2, c3);
360	r[`9`] = c1;
361	c1 = `0`;
362	mul_add_c(a[`7`], b[`3`], c2, c3, c1);
363	mul_add_c(a[`6`], b[`4`], c2, c3, c1);
364	mul_add_c(a[`5`], b[`5`], c2, c3, c1);
365	mul_add_c(a[`4`], b[`6`], c2, c3, c1);
366	mul_add_c(a[`3`], b[`7`], c2, c3, c1);
367	r[`10`] = c2;
368	c2 = `0`;
369	mul_add_c(a[`4`], b[`7`], c3, c1, c2);
370	mul_add_c(a[`5`], b[`6`], c3, c1, c2);
371	mul_add_c(a[`6`], b[`5`], c3, c1, c2);
372	mul_add_c(a[`7`], b[`4`], c3, c1, c2);
373	r[`11`] = c3;
374	c3 = `0`;
375	mul_add_c(a[`7`], b[`5`], c1, c2, c3);
376	mul_add_c(a[`6`], b[`6`], c1, c2, c3);
377	mul_add_c(a[`5`], b[`7`], c1, c2, c3);
378	r[`12`] = c1;
379	c1 = `0`;
380	mul_add_c(a[`6`], b[`7`], c2, c3, c1);
381	mul_add_c(a[`7`], b[`6`], c2, c3, c1);
382	r[`13`] = c2;
383	c2 = `0`;
384	mul_add_c(a[`7`], b[`7`], c3, c1, c2);
385	r[`14`] = c3;
386	r[`15`] = c1;
387	}
388
389	void bn_mul_comba4(BN_ULONG r[`8`], const BN_ULONG a[`4`], const BN_ULONG b[`4`]) {
390	BN_ULONG c1, c2, c3;
391
392	c1 = `0`;
393	c2 = `0`;
394	c3 = `0`;
395	mul_add_c(a[`0`], b[`0`], c1, c2, c3);
396	r[`0`] = c1;
397	c1 = `0`;
398	mul_add_c(a[`0`], b[`1`], c2, c3, c1);
399	mul_add_c(a[`1`], b[`0`], c2, c3, c1);
400	r[`1`] = c2;
401	c2 = `0`;
402	mul_add_c(a[`2`], b[`0`], c3, c1, c2);
403	mul_add_c(a[`1`], b[`1`], c3, c1, c2);
404	mul_add_c(a[`0`], b[`2`], c3, c1, c2);
405	r[`2`] = c3;
406	c3 = `0`;
407	mul_add_c(a[`0`], b[`3`], c1, c2, c3);
408	mul_add_c(a[`1`], b[`2`], c1, c2, c3);
409	mul_add_c(a[`2`], b[`1`], c1, c2, c3);
410	mul_add_c(a[`3`], b[`0`], c1, c2, c3);
411	r[`3`] = c1;
412	c1 = `0`;
413	mul_add_c(a[`3`], b[`1`], c2, c3, c1);
414	mul_add_c(a[`2`], b[`2`], c2, c3, c1);
415	mul_add_c(a[`1`], b[`3`], c2, c3, c1);
416	r[`4`] = c2;
417	c2 = `0`;
418	mul_add_c(a[`2`], b[`3`], c3, c1, c2);
419	mul_add_c(a[`3`], b[`2`], c3, c1, c2);
420	r[`5`] = c3;
421	c3 = `0`;
422	mul_add_c(a[`3`], b[`3`], c1, c2, c3);
423	r[`6`] = c1;
424	r[`7`] = c2;
425	}
426
427	void bn_sqr_comba8(BN_ULONG r[`16`], const BN_ULONG a[`8`]) {
428	BN_ULONG c1, c2, c3;
429
430	c1 = `0`;
431	c2 = `0`;
432	c3 = `0`;
433	sqr_add_c(a, `0`, c1, c2, c3);
434	r[`0`] = c1;
435	c1 = `0`;
436	sqr_add_c2(a, `1`, `0`, c2, c3, c1);
437	r[`1`] = c2;
438	c2 = `0`;
439	sqr_add_c(a, `1`, c3, c1, c2);
440	sqr_add_c2(a, `2`, `0`, c3, c1, c2);
441	r[`2`] = c3;
442	c3 = `0`;
443	sqr_add_c2(a, `3`, `0`, c1, c2, c3);
444	sqr_add_c2(a, `2`, `1`, c1, c2, c3);
445	r[`3`] = c1;
446	c1 = `0`;
447	sqr_add_c(a, `2`, c2, c3, c1);
448	sqr_add_c2(a, `3`, `1`, c2, c3, c1);
449	sqr_add_c2(a, `4`, `0`, c2, c3, c1);
450	r[`4`] = c2;
451	c2 = `0`;
452	sqr_add_c2(a, `5`, `0`, c3, c1, c2);
453	sqr_add_c2(a, `4`, `1`, c3, c1, c2);
454	sqr_add_c2(a, `3`, `2`, c3, c1, c2);
455	r[`5`] = c3;
456	c3 = `0`;
457	sqr_add_c(a, `3`, c1, c2, c3);
458	sqr_add_c2(a, `4`, `2`, c1, c2, c3);
459	sqr_add_c2(a, `5`, `1`, c1, c2, c3);
460	sqr_add_c2(a, `6`, `0`, c1, c2, c3);
461	r[`6`] = c1;
462	c1 = `0`;
463	sqr_add_c2(a, `7`, `0`, c2, c3, c1);
464	sqr_add_c2(a, `6`, `1`, c2, c3, c1);
465	sqr_add_c2(a, `5`, `2`, c2, c3, c1);
466	sqr_add_c2(a, `4`, `3`, c2, c3, c1);
467	r[`7`] = c2;
468	c2 = `0`;
469	sqr_add_c(a, `4`, c3, c1, c2);
470	sqr_add_c2(a, `5`, `3`, c3, c1, c2);
471	sqr_add_c2(a, `6`, `2`, c3, c1, c2);
472	sqr_add_c2(a, `7`, `1`, c3, c1, c2);
473	r[`8`] = c3;
474	c3 = `0`;
475	sqr_add_c2(a, `7`, `2`, c1, c2, c3);
476	sqr_add_c2(a, `6`, `3`, c1, c2, c3);
477	sqr_add_c2(a, `5`, `4`, c1, c2, c3);
478	r[`9`] = c1;
479	c1 = `0`;
480	sqr_add_c(a, `5`, c2, c3, c1);
481	sqr_add_c2(a, `6`, `4`, c2, c3, c1);
482	sqr_add_c2(a, `7`, `3`, c2, c3, c1);
483	r[`10`] = c2;
484	c2 = `0`;
485	sqr_add_c2(a, `7`, `4`, c3, c1, c2);
486	sqr_add_c2(a, `6`, `5`, c3, c1, c2);
487	r[`11`] = c3;
488	c3 = `0`;
489	sqr_add_c(a, `6`, c1, c2, c3);
490	sqr_add_c2(a, `7`, `5`, c1, c2, c3);
491	r[`12`] = c1;
492	c1 = `0`;
493	sqr_add_c2(a, `7`, `6`, c2, c3, c1);
494	r[`13`] = c2;
495	c2 = `0`;
496	sqr_add_c(a, `7`, c3, c1, c2);
497	r[`14`] = c3;
498	r[`15`] = c1;
499	}
500
501	void bn_sqr_comba4(BN_ULONG r[`8`], const BN_ULONG a[`4`]) {
502	BN_ULONG c1, c2, c3;
503
504	c1 = `0`;
505	c2 = `0`;
506	c3 = `0`;
507	sqr_add_c(a, `0`, c1, c2, c3);
508	r[`0`] = c1;
509	c1 = `0`;
510	sqr_add_c2(a, `1`, `0`, c2, c3, c1);
511	r[`1`] = c2;
512	c2 = `0`;
513	sqr_add_c(a, `1`, c3, c1, c2);
514	sqr_add_c2(a, `2`, `0`, c3, c1, c2);
515	r[`2`] = c3;
516	c3 = `0`;
517	sqr_add_c2(a, `3`, `0`, c1, c2, c3);
518	sqr_add_c2(a, `2`, `1`, c1, c2, c3);
519	r[`3`] = c1;
520	c1 = `0`;
521	sqr_add_c(a, `2`, c2, c3, c1);
522	sqr_add_c2(a, `3`, `1`, c2, c3, c1);
523	r[`4`] = c2;
524	c2 = `0`;
525	sqr_add_c2(a, `3`, `2`, c3, c1, c2);
526	r[`5`] = c3;
527	c3 = `0`;
528	sqr_add_c(a, `3`, c1, c2, c3);
529	r[`6`] = c1;
530	r[`7`] = c2;
531	}
532
533	#undef mul_add
534	#undef mul
535	#undef sqr
536	#undef mul_add_c
537	#undef sqr_add_c
538	#undef mul_add_c2
539	#undef sqr_add_c2
540
541	#endif // !NO_ASM && X86_64 && (__GNUC__ \|\| __clang__)
542

Browse the source code of engine/third_party/boringssl/src/crypto/fipsmodule/bn/asm/x86_64-gcc.c