1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15.extern OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
17
18
19.align 64
20.Lpoly:
21.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
22
23.LOne:
24.long 1,1,1,1,1,1,1,1
25.LTwo:
26.long 2,2,2,2,2,2,2,2
27.LThree:
28.long 3,3,3,3,3,3,3,3
29.LONE_mont:
30.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
31
32
33.Lord:
34.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
35.LordK:
36.quad 0xccd1c8aaee00bc4f
37
38
39
40.globl ecp_nistz256_neg
41.hidden ecp_nistz256_neg
42.type ecp_nistz256_neg,@function
43.align 32
44ecp_nistz256_neg:
45.cfi_startproc
46 pushq %r12
47.cfi_adjust_cfa_offset 8
48.cfi_offset %r12,-16
49 pushq %r13
50.cfi_adjust_cfa_offset 8
51.cfi_offset %r13,-24
52.Lneg_body:
53
54 xorq %r8,%r8
55 xorq %r9,%r9
56 xorq %r10,%r10
57 xorq %r11,%r11
58 xorq %r13,%r13
59
60 subq 0(%rsi),%r8
61 sbbq 8(%rsi),%r9
62 sbbq 16(%rsi),%r10
63 movq %r8,%rax
64 sbbq 24(%rsi),%r11
65 leaq .Lpoly(%rip),%rsi
66 movq %r9,%rdx
67 sbbq $0,%r13
68
69 addq 0(%rsi),%r8
70 movq %r10,%rcx
71 adcq 8(%rsi),%r9
72 adcq 16(%rsi),%r10
73 movq %r11,%r12
74 adcq 24(%rsi),%r11
75 testq %r13,%r13
76
77 cmovzq %rax,%r8
78 cmovzq %rdx,%r9
79 movq %r8,0(%rdi)
80 cmovzq %rcx,%r10
81 movq %r9,8(%rdi)
82 cmovzq %r12,%r11
83 movq %r10,16(%rdi)
84 movq %r11,24(%rdi)
85
86 movq 0(%rsp),%r13
87.cfi_restore %r13
88 movq 8(%rsp),%r12
89.cfi_restore %r12
90 leaq 16(%rsp),%rsp
91.cfi_adjust_cfa_offset -16
92.Lneg_epilogue:
93 .byte 0xf3,0xc3
94.cfi_endproc
95.size ecp_nistz256_neg,.-ecp_nistz256_neg
96
97
98
99
100
101
102.globl ecp_nistz256_ord_mul_mont
103.hidden ecp_nistz256_ord_mul_mont
104.type ecp_nistz256_ord_mul_mont,@function
105.align 32
106ecp_nistz256_ord_mul_mont:
107.cfi_startproc
108 leaq OPENSSL_ia32cap_P(%rip),%rcx
109 movq 8(%rcx),%rcx
110 andl $0x80100,%ecx
111 cmpl $0x80100,%ecx
112 je .Lecp_nistz256_ord_mul_montx
113 pushq %rbp
114.cfi_adjust_cfa_offset 8
115.cfi_offset %rbp,-16
116 pushq %rbx
117.cfi_adjust_cfa_offset 8
118.cfi_offset %rbx,-24
119 pushq %r12
120.cfi_adjust_cfa_offset 8
121.cfi_offset %r12,-32
122 pushq %r13
123.cfi_adjust_cfa_offset 8
124.cfi_offset %r13,-40
125 pushq %r14
126.cfi_adjust_cfa_offset 8
127.cfi_offset %r14,-48
128 pushq %r15
129.cfi_adjust_cfa_offset 8
130.cfi_offset %r15,-56
131.Lord_mul_body:
132
133 movq 0(%rdx),%rax
134 movq %rdx,%rbx
135 leaq .Lord(%rip),%r14
136 movq .LordK(%rip),%r15
137
138
139 movq %rax,%rcx
140 mulq 0(%rsi)
141 movq %rax,%r8
142 movq %rcx,%rax
143 movq %rdx,%r9
144
145 mulq 8(%rsi)
146 addq %rax,%r9
147 movq %rcx,%rax
148 adcq $0,%rdx
149 movq %rdx,%r10
150
151 mulq 16(%rsi)
152 addq %rax,%r10
153 movq %rcx,%rax
154 adcq $0,%rdx
155
156 movq %r8,%r13
157 imulq %r15,%r8
158
159 movq %rdx,%r11
160 mulq 24(%rsi)
161 addq %rax,%r11
162 movq %r8,%rax
163 adcq $0,%rdx
164 movq %rdx,%r12
165
166
167 mulq 0(%r14)
168 movq %r8,%rbp
169 addq %rax,%r13
170 movq %r8,%rax
171 adcq $0,%rdx
172 movq %rdx,%rcx
173
174 subq %r8,%r10
175 sbbq $0,%r8
176
177 mulq 8(%r14)
178 addq %rcx,%r9
179 adcq $0,%rdx
180 addq %rax,%r9
181 movq %rbp,%rax
182 adcq %rdx,%r10
183 movq %rbp,%rdx
184 adcq $0,%r8
185
186 shlq $32,%rax
187 shrq $32,%rdx
188 subq %rax,%r11
189 movq 8(%rbx),%rax
190 sbbq %rdx,%rbp
191
192 addq %r8,%r11
193 adcq %rbp,%r12
194 adcq $0,%r13
195
196
197 movq %rax,%rcx
198 mulq 0(%rsi)
199 addq %rax,%r9
200 movq %rcx,%rax
201 adcq $0,%rdx
202 movq %rdx,%rbp
203
204 mulq 8(%rsi)
205 addq %rbp,%r10
206 adcq $0,%rdx
207 addq %rax,%r10
208 movq %rcx,%rax
209 adcq $0,%rdx
210 movq %rdx,%rbp
211
212 mulq 16(%rsi)
213 addq %rbp,%r11
214 adcq $0,%rdx
215 addq %rax,%r11
216 movq %rcx,%rax
217 adcq $0,%rdx
218
219 movq %r9,%rcx
220 imulq %r15,%r9
221
222 movq %rdx,%rbp
223 mulq 24(%rsi)
224 addq %rbp,%r12
225 adcq $0,%rdx
226 xorq %r8,%r8
227 addq %rax,%r12
228 movq %r9,%rax
229 adcq %rdx,%r13
230 adcq $0,%r8
231
232
233 mulq 0(%r14)
234 movq %r9,%rbp
235 addq %rax,%rcx
236 movq %r9,%rax
237 adcq %rdx,%rcx
238
239 subq %r9,%r11
240 sbbq $0,%r9
241
242 mulq 8(%r14)
243 addq %rcx,%r10
244 adcq $0,%rdx
245 addq %rax,%r10
246 movq %rbp,%rax
247 adcq %rdx,%r11
248 movq %rbp,%rdx
249 adcq $0,%r9
250
251 shlq $32,%rax
252 shrq $32,%rdx
253 subq %rax,%r12
254 movq 16(%rbx),%rax
255 sbbq %rdx,%rbp
256
257 addq %r9,%r12
258 adcq %rbp,%r13
259 adcq $0,%r8
260
261
262 movq %rax,%rcx
263 mulq 0(%rsi)
264 addq %rax,%r10
265 movq %rcx,%rax
266 adcq $0,%rdx
267 movq %rdx,%rbp
268
269 mulq 8(%rsi)
270 addq %rbp,%r11
271 adcq $0,%rdx
272 addq %rax,%r11
273 movq %rcx,%rax
274 adcq $0,%rdx
275 movq %rdx,%rbp
276
277 mulq 16(%rsi)
278 addq %rbp,%r12
279 adcq $0,%rdx
280 addq %rax,%r12
281 movq %rcx,%rax
282 adcq $0,%rdx
283
284 movq %r10,%rcx
285 imulq %r15,%r10
286
287 movq %rdx,%rbp
288 mulq 24(%rsi)
289 addq %rbp,%r13
290 adcq $0,%rdx
291 xorq %r9,%r9
292 addq %rax,%r13
293 movq %r10,%rax
294 adcq %rdx,%r8
295 adcq $0,%r9
296
297
298 mulq 0(%r14)
299 movq %r10,%rbp
300 addq %rax,%rcx
301 movq %r10,%rax
302 adcq %rdx,%rcx
303
304 subq %r10,%r12
305 sbbq $0,%r10
306
307 mulq 8(%r14)
308 addq %rcx,%r11
309 adcq $0,%rdx
310 addq %rax,%r11
311 movq %rbp,%rax
312 adcq %rdx,%r12
313 movq %rbp,%rdx
314 adcq $0,%r10
315
316 shlq $32,%rax
317 shrq $32,%rdx
318 subq %rax,%r13
319 movq 24(%rbx),%rax
320 sbbq %rdx,%rbp
321
322 addq %r10,%r13
323 adcq %rbp,%r8
324 adcq $0,%r9
325
326
327 movq %rax,%rcx
328 mulq 0(%rsi)
329 addq %rax,%r11
330 movq %rcx,%rax
331 adcq $0,%rdx
332 movq %rdx,%rbp
333
334 mulq 8(%rsi)
335 addq %rbp,%r12
336 adcq $0,%rdx
337 addq %rax,%r12
338 movq %rcx,%rax
339 adcq $0,%rdx
340 movq %rdx,%rbp
341
342 mulq 16(%rsi)
343 addq %rbp,%r13
344 adcq $0,%rdx
345 addq %rax,%r13
346 movq %rcx,%rax
347 adcq $0,%rdx
348
349 movq %r11,%rcx
350 imulq %r15,%r11
351
352 movq %rdx,%rbp
353 mulq 24(%rsi)
354 addq %rbp,%r8
355 adcq $0,%rdx
356 xorq %r10,%r10
357 addq %rax,%r8
358 movq %r11,%rax
359 adcq %rdx,%r9
360 adcq $0,%r10
361
362
363 mulq 0(%r14)
364 movq %r11,%rbp
365 addq %rax,%rcx
366 movq %r11,%rax
367 adcq %rdx,%rcx
368
369 subq %r11,%r13
370 sbbq $0,%r11
371
372 mulq 8(%r14)
373 addq %rcx,%r12
374 adcq $0,%rdx
375 addq %rax,%r12
376 movq %rbp,%rax
377 adcq %rdx,%r13
378 movq %rbp,%rdx
379 adcq $0,%r11
380
381 shlq $32,%rax
382 shrq $32,%rdx
383 subq %rax,%r8
384 sbbq %rdx,%rbp
385
386 addq %r11,%r8
387 adcq %rbp,%r9
388 adcq $0,%r10
389
390
391 movq %r12,%rsi
392 subq 0(%r14),%r12
393 movq %r13,%r11
394 sbbq 8(%r14),%r13
395 movq %r8,%rcx
396 sbbq 16(%r14),%r8
397 movq %r9,%rbp
398 sbbq 24(%r14),%r9
399 sbbq $0,%r10
400
401 cmovcq %rsi,%r12
402 cmovcq %r11,%r13
403 cmovcq %rcx,%r8
404 cmovcq %rbp,%r9
405
406 movq %r12,0(%rdi)
407 movq %r13,8(%rdi)
408 movq %r8,16(%rdi)
409 movq %r9,24(%rdi)
410
411 movq 0(%rsp),%r15
412.cfi_restore %r15
413 movq 8(%rsp),%r14
414.cfi_restore %r14
415 movq 16(%rsp),%r13
416.cfi_restore %r13
417 movq 24(%rsp),%r12
418.cfi_restore %r12
419 movq 32(%rsp),%rbx
420.cfi_restore %rbx
421 movq 40(%rsp),%rbp
422.cfi_restore %rbp
423 leaq 48(%rsp),%rsp
424.cfi_adjust_cfa_offset -48
425.Lord_mul_epilogue:
426 .byte 0xf3,0xc3
427.cfi_endproc
428.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
429
430
431
432
433
434
435
436.globl ecp_nistz256_ord_sqr_mont
437.hidden ecp_nistz256_ord_sqr_mont
438.type ecp_nistz256_ord_sqr_mont,@function
439.align 32
440ecp_nistz256_ord_sqr_mont:
441.cfi_startproc
442 leaq OPENSSL_ia32cap_P(%rip),%rcx
443 movq 8(%rcx),%rcx
444 andl $0x80100,%ecx
445 cmpl $0x80100,%ecx
446 je .Lecp_nistz256_ord_sqr_montx
447 pushq %rbp
448.cfi_adjust_cfa_offset 8
449.cfi_offset %rbp,-16
450 pushq %rbx
451.cfi_adjust_cfa_offset 8
452.cfi_offset %rbx,-24
453 pushq %r12
454.cfi_adjust_cfa_offset 8
455.cfi_offset %r12,-32
456 pushq %r13
457.cfi_adjust_cfa_offset 8
458.cfi_offset %r13,-40
459 pushq %r14
460.cfi_adjust_cfa_offset 8
461.cfi_offset %r14,-48
462 pushq %r15
463.cfi_adjust_cfa_offset 8
464.cfi_offset %r15,-56
465.Lord_sqr_body:
466
467 movq 0(%rsi),%r8
468 movq 8(%rsi),%rax
469 movq 16(%rsi),%r14
470 movq 24(%rsi),%r15
471 leaq .Lord(%rip),%rsi
472 movq %rdx,%rbx
473 jmp .Loop_ord_sqr
474
475.align 32
476.Loop_ord_sqr:
477
478 movq %rax,%rbp
479 mulq %r8
480 movq %rax,%r9
481.byte 102,72,15,110,205
482 movq %r14,%rax
483 movq %rdx,%r10
484
485 mulq %r8
486 addq %rax,%r10
487 movq %r15,%rax
488.byte 102,73,15,110,214
489 adcq $0,%rdx
490 movq %rdx,%r11
491
492 mulq %r8
493 addq %rax,%r11
494 movq %r15,%rax
495.byte 102,73,15,110,223
496 adcq $0,%rdx
497 movq %rdx,%r12
498
499
500 mulq %r14
501 movq %rax,%r13
502 movq %r14,%rax
503 movq %rdx,%r14
504
505
506 mulq %rbp
507 addq %rax,%r11
508 movq %r15,%rax
509 adcq $0,%rdx
510 movq %rdx,%r15
511
512 mulq %rbp
513 addq %rax,%r12
514 adcq $0,%rdx
515
516 addq %r15,%r12
517 adcq %rdx,%r13
518 adcq $0,%r14
519
520
521 xorq %r15,%r15
522 movq %r8,%rax
523 addq %r9,%r9
524 adcq %r10,%r10
525 adcq %r11,%r11
526 adcq %r12,%r12
527 adcq %r13,%r13
528 adcq %r14,%r14
529 adcq $0,%r15
530
531
532 mulq %rax
533 movq %rax,%r8
534.byte 102,72,15,126,200
535 movq %rdx,%rbp
536
537 mulq %rax
538 addq %rbp,%r9
539 adcq %rax,%r10
540.byte 102,72,15,126,208
541 adcq $0,%rdx
542 movq %rdx,%rbp
543
544 mulq %rax
545 addq %rbp,%r11
546 adcq %rax,%r12
547.byte 102,72,15,126,216
548 adcq $0,%rdx
549 movq %rdx,%rbp
550
551 movq %r8,%rcx
552 imulq 32(%rsi),%r8
553
554 mulq %rax
555 addq %rbp,%r13
556 adcq %rax,%r14
557 movq 0(%rsi),%rax
558 adcq %rdx,%r15
559
560
561 mulq %r8
562 movq %r8,%rbp
563 addq %rax,%rcx
564 movq 8(%rsi),%rax
565 adcq %rdx,%rcx
566
567 subq %r8,%r10
568 sbbq $0,%rbp
569
570 mulq %r8
571 addq %rcx,%r9
572 adcq $0,%rdx
573 addq %rax,%r9
574 movq %r8,%rax
575 adcq %rdx,%r10
576 movq %r8,%rdx
577 adcq $0,%rbp
578
579 movq %r9,%rcx
580 imulq 32(%rsi),%r9
581
582 shlq $32,%rax
583 shrq $32,%rdx
584 subq %rax,%r11
585 movq 0(%rsi),%rax
586 sbbq %rdx,%r8
587
588 addq %rbp,%r11
589 adcq $0,%r8
590
591
592 mulq %r9
593 movq %r9,%rbp
594 addq %rax,%rcx
595 movq 8(%rsi),%rax
596 adcq %rdx,%rcx
597
598 subq %r9,%r11
599 sbbq $0,%rbp
600
601 mulq %r9
602 addq %rcx,%r10
603 adcq $0,%rdx
604 addq %rax,%r10
605 movq %r9,%rax
606 adcq %rdx,%r11
607 movq %r9,%rdx
608 adcq $0,%rbp
609
610 movq %r10,%rcx
611 imulq 32(%rsi),%r10
612
613 shlq $32,%rax
614 shrq $32,%rdx
615 subq %rax,%r8
616 movq 0(%rsi),%rax
617 sbbq %rdx,%r9
618
619 addq %rbp,%r8
620 adcq $0,%r9
621
622
623 mulq %r10
624 movq %r10,%rbp
625 addq %rax,%rcx
626 movq 8(%rsi),%rax
627 adcq %rdx,%rcx
628
629 subq %r10,%r8
630 sbbq $0,%rbp
631
632 mulq %r10
633 addq %rcx,%r11
634 adcq $0,%rdx
635 addq %rax,%r11
636 movq %r10,%rax
637 adcq %rdx,%r8
638 movq %r10,%rdx
639 adcq $0,%rbp
640
641 movq %r11,%rcx
642 imulq 32(%rsi),%r11
643
644 shlq $32,%rax
645 shrq $32,%rdx
646 subq %rax,%r9
647 movq 0(%rsi),%rax
648 sbbq %rdx,%r10
649
650 addq %rbp,%r9
651 adcq $0,%r10
652
653
654 mulq %r11
655 movq %r11,%rbp
656 addq %rax,%rcx
657 movq 8(%rsi),%rax
658 adcq %rdx,%rcx
659
660 subq %r11,%r9
661 sbbq $0,%rbp
662
663 mulq %r11
664 addq %rcx,%r8
665 adcq $0,%rdx
666 addq %rax,%r8
667 movq %r11,%rax
668 adcq %rdx,%r9
669 movq %r11,%rdx
670 adcq $0,%rbp
671
672 shlq $32,%rax
673 shrq $32,%rdx
674 subq %rax,%r10
675 sbbq %rdx,%r11
676
677 addq %rbp,%r10
678 adcq $0,%r11
679
680
681 xorq %rdx,%rdx
682 addq %r12,%r8
683 adcq %r13,%r9
684 movq %r8,%r12
685 adcq %r14,%r10
686 adcq %r15,%r11
687 movq %r9,%rax
688 adcq $0,%rdx
689
690
691 subq 0(%rsi),%r8
692 movq %r10,%r14
693 sbbq 8(%rsi),%r9
694 sbbq 16(%rsi),%r10
695 movq %r11,%r15
696 sbbq 24(%rsi),%r11
697 sbbq $0,%rdx
698
699 cmovcq %r12,%r8
700 cmovncq %r9,%rax
701 cmovncq %r10,%r14
702 cmovncq %r11,%r15
703
704 decq %rbx
705 jnz .Loop_ord_sqr
706
707 movq %r8,0(%rdi)
708 movq %rax,8(%rdi)
709 pxor %xmm1,%xmm1
710 movq %r14,16(%rdi)
711 pxor %xmm2,%xmm2
712 movq %r15,24(%rdi)
713 pxor %xmm3,%xmm3
714
715 movq 0(%rsp),%r15
716.cfi_restore %r15
717 movq 8(%rsp),%r14
718.cfi_restore %r14
719 movq 16(%rsp),%r13
720.cfi_restore %r13
721 movq 24(%rsp),%r12
722.cfi_restore %r12
723 movq 32(%rsp),%rbx
724.cfi_restore %rbx
725 movq 40(%rsp),%rbp
726.cfi_restore %rbp
727 leaq 48(%rsp),%rsp
728.cfi_adjust_cfa_offset -48
729.Lord_sqr_epilogue:
730 .byte 0xf3,0xc3
731.cfi_endproc
732.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
733
734.type ecp_nistz256_ord_mul_montx,@function
735.align 32
736ecp_nistz256_ord_mul_montx:
737.cfi_startproc
738.Lecp_nistz256_ord_mul_montx:
739 pushq %rbp
740.cfi_adjust_cfa_offset 8
741.cfi_offset %rbp,-16
742 pushq %rbx
743.cfi_adjust_cfa_offset 8
744.cfi_offset %rbx,-24
745 pushq %r12
746.cfi_adjust_cfa_offset 8
747.cfi_offset %r12,-32
748 pushq %r13
749.cfi_adjust_cfa_offset 8
750.cfi_offset %r13,-40
751 pushq %r14
752.cfi_adjust_cfa_offset 8
753.cfi_offset %r14,-48
754 pushq %r15
755.cfi_adjust_cfa_offset 8
756.cfi_offset %r15,-56
757.Lord_mulx_body:
758
759 movq %rdx,%rbx
760 movq 0(%rdx),%rdx
761 movq 0(%rsi),%r9
762 movq 8(%rsi),%r10
763 movq 16(%rsi),%r11
764 movq 24(%rsi),%r12
765 leaq -128(%rsi),%rsi
766 leaq .Lord-128(%rip),%r14
767 movq .LordK(%rip),%r15
768
769
770 mulxq %r9,%r8,%r9
771 mulxq %r10,%rcx,%r10
772 mulxq %r11,%rbp,%r11
773 addq %rcx,%r9
774 mulxq %r12,%rcx,%r12
775 movq %r8,%rdx
776 mulxq %r15,%rdx,%rax
777 adcq %rbp,%r10
778 adcq %rcx,%r11
779 adcq $0,%r12
780
781
782 xorq %r13,%r13
783 mulxq 0+128(%r14),%rcx,%rbp
784 adcxq %rcx,%r8
785 adoxq %rbp,%r9
786
787 mulxq 8+128(%r14),%rcx,%rbp
788 adcxq %rcx,%r9
789 adoxq %rbp,%r10
790
791 mulxq 16+128(%r14),%rcx,%rbp
792 adcxq %rcx,%r10
793 adoxq %rbp,%r11
794
795 mulxq 24+128(%r14),%rcx,%rbp
796 movq 8(%rbx),%rdx
797 adcxq %rcx,%r11
798 adoxq %rbp,%r12
799 adcxq %r8,%r12
800 adoxq %r8,%r13
801 adcq $0,%r13
802
803
804 mulxq 0+128(%rsi),%rcx,%rbp
805 adcxq %rcx,%r9
806 adoxq %rbp,%r10
807
808 mulxq 8+128(%rsi),%rcx,%rbp
809 adcxq %rcx,%r10
810 adoxq %rbp,%r11
811
812 mulxq 16+128(%rsi),%rcx,%rbp
813 adcxq %rcx,%r11
814 adoxq %rbp,%r12
815
816 mulxq 24+128(%rsi),%rcx,%rbp
817 movq %r9,%rdx
818 mulxq %r15,%rdx,%rax
819 adcxq %rcx,%r12
820 adoxq %rbp,%r13
821
822 adcxq %r8,%r13
823 adoxq %r8,%r8
824 adcq $0,%r8
825
826
827 mulxq 0+128(%r14),%rcx,%rbp
828 adcxq %rcx,%r9
829 adoxq %rbp,%r10
830
831 mulxq 8+128(%r14),%rcx,%rbp
832 adcxq %rcx,%r10
833 adoxq %rbp,%r11
834
835 mulxq 16+128(%r14),%rcx,%rbp
836 adcxq %rcx,%r11
837 adoxq %rbp,%r12
838
839 mulxq 24+128(%r14),%rcx,%rbp
840 movq 16(%rbx),%rdx
841 adcxq %rcx,%r12
842 adoxq %rbp,%r13
843 adcxq %r9,%r13
844 adoxq %r9,%r8
845 adcq $0,%r8
846
847
848 mulxq 0+128(%rsi),%rcx,%rbp
849 adcxq %rcx,%r10
850 adoxq %rbp,%r11
851
852 mulxq 8+128(%rsi),%rcx,%rbp
853 adcxq %rcx,%r11
854 adoxq %rbp,%r12
855
856 mulxq 16+128(%rsi),%rcx,%rbp
857 adcxq %rcx,%r12
858 adoxq %rbp,%r13
859
860 mulxq 24+128(%rsi),%rcx,%rbp
861 movq %r10,%rdx
862 mulxq %r15,%rdx,%rax
863 adcxq %rcx,%r13
864 adoxq %rbp,%r8
865
866 adcxq %r9,%r8
867 adoxq %r9,%r9
868 adcq $0,%r9
869
870
871 mulxq 0+128(%r14),%rcx,%rbp
872 adcxq %rcx,%r10
873 adoxq %rbp,%r11
874
875 mulxq 8+128(%r14),%rcx,%rbp
876 adcxq %rcx,%r11
877 adoxq %rbp,%r12
878
879 mulxq 16+128(%r14),%rcx,%rbp
880 adcxq %rcx,%r12
881 adoxq %rbp,%r13
882
883 mulxq 24+128(%r14),%rcx,%rbp
884 movq 24(%rbx),%rdx
885 adcxq %rcx,%r13
886 adoxq %rbp,%r8
887 adcxq %r10,%r8
888 adoxq %r10,%r9
889 adcq $0,%r9
890
891
892 mulxq 0+128(%rsi),%rcx,%rbp
893 adcxq %rcx,%r11
894 adoxq %rbp,%r12
895
896 mulxq 8+128(%rsi),%rcx,%rbp
897 adcxq %rcx,%r12
898 adoxq %rbp,%r13
899
900 mulxq 16+128(%rsi),%rcx,%rbp
901 adcxq %rcx,%r13
902 adoxq %rbp,%r8
903
904 mulxq 24+128(%rsi),%rcx,%rbp
905 movq %r11,%rdx
906 mulxq %r15,%rdx,%rax
907 adcxq %rcx,%r8
908 adoxq %rbp,%r9
909
910 adcxq %r10,%r9
911 adoxq %r10,%r10
912 adcq $0,%r10
913
914
915 mulxq 0+128(%r14),%rcx,%rbp
916 adcxq %rcx,%r11
917 adoxq %rbp,%r12
918
919 mulxq 8+128(%r14),%rcx,%rbp
920 adcxq %rcx,%r12
921 adoxq %rbp,%r13
922
923 mulxq 16+128(%r14),%rcx,%rbp
924 adcxq %rcx,%r13
925 adoxq %rbp,%r8
926
927 mulxq 24+128(%r14),%rcx,%rbp
928 leaq 128(%r14),%r14
929 movq %r12,%rbx
930 adcxq %rcx,%r8
931 adoxq %rbp,%r9
932 movq %r13,%rdx
933 adcxq %r11,%r9
934 adoxq %r11,%r10
935 adcq $0,%r10
936
937
938
939 movq %r8,%rcx
940 subq 0(%r14),%r12
941 sbbq 8(%r14),%r13
942 sbbq 16(%r14),%r8
943 movq %r9,%rbp
944 sbbq 24(%r14),%r9
945 sbbq $0,%r10
946
947 cmovcq %rbx,%r12
948 cmovcq %rdx,%r13
949 cmovcq %rcx,%r8
950 cmovcq %rbp,%r9
951
952 movq %r12,0(%rdi)
953 movq %r13,8(%rdi)
954 movq %r8,16(%rdi)
955 movq %r9,24(%rdi)
956
957 movq 0(%rsp),%r15
958.cfi_restore %r15
959 movq 8(%rsp),%r14
960.cfi_restore %r14
961 movq 16(%rsp),%r13
962.cfi_restore %r13
963 movq 24(%rsp),%r12
964.cfi_restore %r12
965 movq 32(%rsp),%rbx
966.cfi_restore %rbx
967 movq 40(%rsp),%rbp
968.cfi_restore %rbp
969 leaq 48(%rsp),%rsp
970.cfi_adjust_cfa_offset -48
971.Lord_mulx_epilogue:
972 .byte 0xf3,0xc3
973.cfi_endproc
974.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
975
976.type ecp_nistz256_ord_sqr_montx,@function
977.align 32
978ecp_nistz256_ord_sqr_montx:
979.cfi_startproc
980.Lecp_nistz256_ord_sqr_montx:
981 pushq %rbp
982.cfi_adjust_cfa_offset 8
983.cfi_offset %rbp,-16
984 pushq %rbx
985.cfi_adjust_cfa_offset 8
986.cfi_offset %rbx,-24
987 pushq %r12
988.cfi_adjust_cfa_offset 8
989.cfi_offset %r12,-32
990 pushq %r13
991.cfi_adjust_cfa_offset 8
992.cfi_offset %r13,-40
993 pushq %r14
994.cfi_adjust_cfa_offset 8
995.cfi_offset %r14,-48
996 pushq %r15
997.cfi_adjust_cfa_offset 8
998.cfi_offset %r15,-56
999.Lord_sqrx_body:
1000
1001 movq %rdx,%rbx
1002 movq 0(%rsi),%rdx
1003 movq 8(%rsi),%r14
1004 movq 16(%rsi),%r15
1005 movq 24(%rsi),%r8
1006 leaq .Lord(%rip),%rsi
1007 jmp .Loop_ord_sqrx
1008
1009.align 32
1010.Loop_ord_sqrx:
1011 mulxq %r14,%r9,%r10
1012 mulxq %r15,%rcx,%r11
1013 movq %rdx,%rax
1014.byte 102,73,15,110,206
1015 mulxq %r8,%rbp,%r12
1016 movq %r14,%rdx
1017 addq %rcx,%r10
1018.byte 102,73,15,110,215
1019 adcq %rbp,%r11
1020 adcq $0,%r12
1021 xorq %r13,%r13
1022
1023 mulxq %r15,%rcx,%rbp
1024 adcxq %rcx,%r11
1025 adoxq %rbp,%r12
1026
1027 mulxq %r8,%rcx,%rbp
1028 movq %r15,%rdx
1029 adcxq %rcx,%r12
1030 adoxq %rbp,%r13
1031 adcq $0,%r13
1032
1033 mulxq %r8,%rcx,%r14
1034 movq %rax,%rdx
1035.byte 102,73,15,110,216
1036 xorq %r15,%r15
1037 adcxq %r9,%r9
1038 adoxq %rcx,%r13
1039 adcxq %r10,%r10
1040 adoxq %r15,%r14
1041
1042
1043 mulxq %rdx,%r8,%rbp
1044.byte 102,72,15,126,202
1045 adcxq %r11,%r11
1046 adoxq %rbp,%r9
1047 adcxq %r12,%r12
1048 mulxq %rdx,%rcx,%rax
1049.byte 102,72,15,126,210
1050 adcxq %r13,%r13
1051 adoxq %rcx,%r10
1052 adcxq %r14,%r14
1053 mulxq %rdx,%rcx,%rbp
1054.byte 0x67
1055.byte 102,72,15,126,218
1056 adoxq %rax,%r11
1057 adcxq %r15,%r15
1058 adoxq %rcx,%r12
1059 adoxq %rbp,%r13
1060 mulxq %rdx,%rcx,%rax
1061 adoxq %rcx,%r14
1062 adoxq %rax,%r15
1063
1064
1065 movq %r8,%rdx
1066 mulxq 32(%rsi),%rdx,%rcx
1067
1068 xorq %rax,%rax
1069 mulxq 0(%rsi),%rcx,%rbp
1070 adcxq %rcx,%r8
1071 adoxq %rbp,%r9
1072 mulxq 8(%rsi),%rcx,%rbp
1073 adcxq %rcx,%r9
1074 adoxq %rbp,%r10
1075 mulxq 16(%rsi),%rcx,%rbp
1076 adcxq %rcx,%r10
1077 adoxq %rbp,%r11
1078 mulxq 24(%rsi),%rcx,%rbp
1079 adcxq %rcx,%r11
1080 adoxq %rbp,%r8
1081 adcxq %rax,%r8
1082
1083
1084 movq %r9,%rdx
1085 mulxq 32(%rsi),%rdx,%rcx
1086
1087 mulxq 0(%rsi),%rcx,%rbp
1088 adoxq %rcx,%r9
1089 adcxq %rbp,%r10
1090 mulxq 8(%rsi),%rcx,%rbp
1091 adoxq %rcx,%r10
1092 adcxq %rbp,%r11
1093 mulxq 16(%rsi),%rcx,%rbp
1094 adoxq %rcx,%r11
1095 adcxq %rbp,%r8
1096 mulxq 24(%rsi),%rcx,%rbp
1097 adoxq %rcx,%r8
1098 adcxq %rbp,%r9
1099 adoxq %rax,%r9
1100
1101
1102 movq %r10,%rdx
1103 mulxq 32(%rsi),%rdx,%rcx
1104
1105 mulxq 0(%rsi),%rcx,%rbp
1106 adcxq %rcx,%r10
1107 adoxq %rbp,%r11
1108 mulxq 8(%rsi),%rcx,%rbp
1109 adcxq %rcx,%r11
1110 adoxq %rbp,%r8
1111 mulxq 16(%rsi),%rcx,%rbp
1112 adcxq %rcx,%r8
1113 adoxq %rbp,%r9
1114 mulxq 24(%rsi),%rcx,%rbp
1115 adcxq %rcx,%r9
1116 adoxq %rbp,%r10
1117 adcxq %rax,%r10
1118
1119
1120 movq %r11,%rdx
1121 mulxq 32(%rsi),%rdx,%rcx
1122
1123 mulxq 0(%rsi),%rcx,%rbp
1124 adoxq %rcx,%r11
1125 adcxq %rbp,%r8
1126 mulxq 8(%rsi),%rcx,%rbp
1127 adoxq %rcx,%r8
1128 adcxq %rbp,%r9
1129 mulxq 16(%rsi),%rcx,%rbp
1130 adoxq %rcx,%r9
1131 adcxq %rbp,%r10
1132 mulxq 24(%rsi),%rcx,%rbp
1133 adoxq %rcx,%r10
1134 adcxq %rbp,%r11
1135 adoxq %rax,%r11
1136
1137
1138 addq %r8,%r12
1139 adcq %r13,%r9
1140 movq %r12,%rdx
1141 adcq %r14,%r10
1142 adcq %r15,%r11
1143 movq %r9,%r14
1144 adcq $0,%rax
1145
1146
1147 subq 0(%rsi),%r12
1148 movq %r10,%r15
1149 sbbq 8(%rsi),%r9
1150 sbbq 16(%rsi),%r10
1151 movq %r11,%r8
1152 sbbq 24(%rsi),%r11
1153 sbbq $0,%rax
1154
1155 cmovncq %r12,%rdx
1156 cmovncq %r9,%r14
1157 cmovncq %r10,%r15
1158 cmovncq %r11,%r8
1159
1160 decq %rbx
1161 jnz .Loop_ord_sqrx
1162
1163 movq %rdx,0(%rdi)
1164 movq %r14,8(%rdi)
1165 pxor %xmm1,%xmm1
1166 movq %r15,16(%rdi)
1167 pxor %xmm2,%xmm2
1168 movq %r8,24(%rdi)
1169 pxor %xmm3,%xmm3
1170
1171 movq 0(%rsp),%r15
1172.cfi_restore %r15
1173 movq 8(%rsp),%r14
1174.cfi_restore %r14
1175 movq 16(%rsp),%r13
1176.cfi_restore %r13
1177 movq 24(%rsp),%r12
1178.cfi_restore %r12
1179 movq 32(%rsp),%rbx
1180.cfi_restore %rbx
1181 movq 40(%rsp),%rbp
1182.cfi_restore %rbp
1183 leaq 48(%rsp),%rsp
1184.cfi_adjust_cfa_offset -48
1185.Lord_sqrx_epilogue:
1186 .byte 0xf3,0xc3
1187.cfi_endproc
1188.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
1189
1190
1191
1192
1193
1194
1195.globl ecp_nistz256_mul_mont
1196.hidden ecp_nistz256_mul_mont
1197.type ecp_nistz256_mul_mont,@function
1198.align 32
1199ecp_nistz256_mul_mont:
1200.cfi_startproc
1201 leaq OPENSSL_ia32cap_P(%rip),%rcx
1202 movq 8(%rcx),%rcx
1203 andl $0x80100,%ecx
1204.Lmul_mont:
1205 pushq %rbp
1206.cfi_adjust_cfa_offset 8
1207.cfi_offset %rbp,-16
1208 pushq %rbx
1209.cfi_adjust_cfa_offset 8
1210.cfi_offset %rbx,-24
1211 pushq %r12
1212.cfi_adjust_cfa_offset 8
1213.cfi_offset %r12,-32
1214 pushq %r13
1215.cfi_adjust_cfa_offset 8
1216.cfi_offset %r13,-40
1217 pushq %r14
1218.cfi_adjust_cfa_offset 8
1219.cfi_offset %r14,-48
1220 pushq %r15
1221.cfi_adjust_cfa_offset 8
1222.cfi_offset %r15,-56
1223.Lmul_body:
1224 cmpl $0x80100,%ecx
1225 je .Lmul_montx
1226 movq %rdx,%rbx
1227 movq 0(%rdx),%rax
1228 movq 0(%rsi),%r9
1229 movq 8(%rsi),%r10
1230 movq 16(%rsi),%r11
1231 movq 24(%rsi),%r12
1232
1233 call __ecp_nistz256_mul_montq
1234 jmp .Lmul_mont_done
1235
1236.align 32
1237.Lmul_montx:
1238 movq %rdx,%rbx
1239 movq 0(%rdx),%rdx
1240 movq 0(%rsi),%r9
1241 movq 8(%rsi),%r10
1242 movq 16(%rsi),%r11
1243 movq 24(%rsi),%r12
1244 leaq -128(%rsi),%rsi
1245
1246 call __ecp_nistz256_mul_montx
1247.Lmul_mont_done:
1248 movq 0(%rsp),%r15
1249.cfi_restore %r15
1250 movq 8(%rsp),%r14
1251.cfi_restore %r14
1252 movq 16(%rsp),%r13
1253.cfi_restore %r13
1254 movq 24(%rsp),%r12
1255.cfi_restore %r12
1256 movq 32(%rsp),%rbx
1257.cfi_restore %rbx
1258 movq 40(%rsp),%rbp
1259.cfi_restore %rbp
1260 leaq 48(%rsp),%rsp
1261.cfi_adjust_cfa_offset -48
1262.Lmul_epilogue:
1263 .byte 0xf3,0xc3
1264.cfi_endproc
1265.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
1266
1267.type __ecp_nistz256_mul_montq,@function
1268.align 32
1269__ecp_nistz256_mul_montq:
1270.cfi_startproc
1271
1272
1273 movq %rax,%rbp
1274 mulq %r9
1275 movq .Lpoly+8(%rip),%r14
1276 movq %rax,%r8
1277 movq %rbp,%rax
1278 movq %rdx,%r9
1279
1280 mulq %r10
1281 movq .Lpoly+24(%rip),%r15
1282 addq %rax,%r9
1283 movq %rbp,%rax
1284 adcq $0,%rdx
1285 movq %rdx,%r10
1286
1287 mulq %r11
1288 addq %rax,%r10
1289 movq %rbp,%rax
1290 adcq $0,%rdx
1291 movq %rdx,%r11
1292
1293 mulq %r12
1294 addq %rax,%r11
1295 movq %r8,%rax
1296 adcq $0,%rdx
1297 xorq %r13,%r13
1298 movq %rdx,%r12
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309 movq %r8,%rbp
1310 shlq $32,%r8
1311 mulq %r15
1312 shrq $32,%rbp
1313 addq %r8,%r9
1314 adcq %rbp,%r10
1315 adcq %rax,%r11
1316 movq 8(%rbx),%rax
1317 adcq %rdx,%r12
1318 adcq $0,%r13
1319 xorq %r8,%r8
1320
1321
1322
1323 movq %rax,%rbp
1324 mulq 0(%rsi)
1325 addq %rax,%r9
1326 movq %rbp,%rax
1327 adcq $0,%rdx
1328 movq %rdx,%rcx
1329
1330 mulq 8(%rsi)
1331 addq %rcx,%r10
1332 adcq $0,%rdx
1333 addq %rax,%r10
1334 movq %rbp,%rax
1335 adcq $0,%rdx
1336 movq %rdx,%rcx
1337
1338 mulq 16(%rsi)
1339 addq %rcx,%r11
1340 adcq $0,%rdx
1341 addq %rax,%r11
1342 movq %rbp,%rax
1343 adcq $0,%rdx
1344 movq %rdx,%rcx
1345
1346 mulq 24(%rsi)
1347 addq %rcx,%r12
1348 adcq $0,%rdx
1349 addq %rax,%r12
1350 movq %r9,%rax
1351 adcq %rdx,%r13
1352 adcq $0,%r8
1353
1354
1355
1356 movq %r9,%rbp
1357 shlq $32,%r9
1358 mulq %r15
1359 shrq $32,%rbp
1360 addq %r9,%r10
1361 adcq %rbp,%r11
1362 adcq %rax,%r12
1363 movq 16(%rbx),%rax
1364 adcq %rdx,%r13
1365 adcq $0,%r8
1366 xorq %r9,%r9
1367
1368
1369
1370 movq %rax,%rbp
1371 mulq 0(%rsi)
1372 addq %rax,%r10
1373 movq %rbp,%rax
1374 adcq $0,%rdx
1375 movq %rdx,%rcx
1376
1377 mulq 8(%rsi)
1378 addq %rcx,%r11
1379 adcq $0,%rdx
1380 addq %rax,%r11
1381 movq %rbp,%rax
1382 adcq $0,%rdx
1383 movq %rdx,%rcx
1384
1385 mulq 16(%rsi)
1386 addq %rcx,%r12
1387 adcq $0,%rdx
1388 addq %rax,%r12
1389 movq %rbp,%rax
1390 adcq $0,%rdx
1391 movq %rdx,%rcx
1392
1393 mulq 24(%rsi)
1394 addq %rcx,%r13
1395 adcq $0,%rdx
1396 addq %rax,%r13
1397 movq %r10,%rax
1398 adcq %rdx,%r8
1399 adcq $0,%r9
1400
1401
1402
1403 movq %r10,%rbp
1404 shlq $32,%r10
1405 mulq %r15
1406 shrq $32,%rbp
1407 addq %r10,%r11
1408 adcq %rbp,%r12
1409 adcq %rax,%r13
1410 movq 24(%rbx),%rax
1411 adcq %rdx,%r8
1412 adcq $0,%r9
1413 xorq %r10,%r10
1414
1415
1416
1417 movq %rax,%rbp
1418 mulq 0(%rsi)
1419 addq %rax,%r11
1420 movq %rbp,%rax
1421 adcq $0,%rdx
1422 movq %rdx,%rcx
1423
1424 mulq 8(%rsi)
1425 addq %rcx,%r12
1426 adcq $0,%rdx
1427 addq %rax,%r12
1428 movq %rbp,%rax
1429 adcq $0,%rdx
1430 movq %rdx,%rcx
1431
1432 mulq 16(%rsi)
1433 addq %rcx,%r13
1434 adcq $0,%rdx
1435 addq %rax,%r13
1436 movq %rbp,%rax
1437 adcq $0,%rdx
1438 movq %rdx,%rcx
1439
1440 mulq 24(%rsi)
1441 addq %rcx,%r8
1442 adcq $0,%rdx
1443 addq %rax,%r8
1444 movq %r11,%rax
1445 adcq %rdx,%r9
1446 adcq $0,%r10
1447
1448
1449
1450 movq %r11,%rbp
1451 shlq $32,%r11
1452 mulq %r15
1453 shrq $32,%rbp
1454 addq %r11,%r12
1455 adcq %rbp,%r13
1456 movq %r12,%rcx
1457 adcq %rax,%r8
1458 adcq %rdx,%r9
1459 movq %r13,%rbp
1460 adcq $0,%r10
1461
1462
1463
1464 subq $-1,%r12
1465 movq %r8,%rbx
1466 sbbq %r14,%r13
1467 sbbq $0,%r8
1468 movq %r9,%rdx
1469 sbbq %r15,%r9
1470 sbbq $0,%r10
1471
1472 cmovcq %rcx,%r12
1473 cmovcq %rbp,%r13
1474 movq %r12,0(%rdi)
1475 cmovcq %rbx,%r8
1476 movq %r13,8(%rdi)
1477 cmovcq %rdx,%r9
1478 movq %r8,16(%rdi)
1479 movq %r9,24(%rdi)
1480
1481 .byte 0xf3,0xc3
1482.cfi_endproc
1483.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
1484
1485
1486
1487
1488
1489
1490
1491
1492.globl ecp_nistz256_sqr_mont
1493.hidden ecp_nistz256_sqr_mont
1494.type ecp_nistz256_sqr_mont,@function
1495.align 32
1496ecp_nistz256_sqr_mont:
1497.cfi_startproc
1498 leaq OPENSSL_ia32cap_P(%rip),%rcx
1499 movq 8(%rcx),%rcx
1500 andl $0x80100,%ecx
1501 pushq %rbp
1502.cfi_adjust_cfa_offset 8
1503.cfi_offset %rbp,-16
1504 pushq %rbx
1505.cfi_adjust_cfa_offset 8
1506.cfi_offset %rbx,-24
1507 pushq %r12
1508.cfi_adjust_cfa_offset 8
1509.cfi_offset %r12,-32
1510 pushq %r13
1511.cfi_adjust_cfa_offset 8
1512.cfi_offset %r13,-40
1513 pushq %r14
1514.cfi_adjust_cfa_offset 8
1515.cfi_offset %r14,-48
1516 pushq %r15
1517.cfi_adjust_cfa_offset 8
1518.cfi_offset %r15,-56
1519.Lsqr_body:
1520 cmpl $0x80100,%ecx
1521 je .Lsqr_montx
1522 movq 0(%rsi),%rax
1523 movq 8(%rsi),%r14
1524 movq 16(%rsi),%r15
1525 movq 24(%rsi),%r8
1526
1527 call __ecp_nistz256_sqr_montq
1528 jmp .Lsqr_mont_done
1529
1530.align 32
1531.Lsqr_montx:
1532 movq 0(%rsi),%rdx
1533 movq 8(%rsi),%r14
1534 movq 16(%rsi),%r15
1535 movq 24(%rsi),%r8
1536 leaq -128(%rsi),%rsi
1537
1538 call __ecp_nistz256_sqr_montx
1539.Lsqr_mont_done:
1540 movq 0(%rsp),%r15
1541.cfi_restore %r15
1542 movq 8(%rsp),%r14
1543.cfi_restore %r14
1544 movq 16(%rsp),%r13
1545.cfi_restore %r13
1546 movq 24(%rsp),%r12
1547.cfi_restore %r12
1548 movq 32(%rsp),%rbx
1549.cfi_restore %rbx
1550 movq 40(%rsp),%rbp
1551.cfi_restore %rbp
1552 leaq 48(%rsp),%rsp
1553.cfi_adjust_cfa_offset -48
1554.Lsqr_epilogue:
1555 .byte 0xf3,0xc3
1556.cfi_endproc
1557.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
1558
1559.type __ecp_nistz256_sqr_montq,@function
1560.align 32
1561__ecp_nistz256_sqr_montq:
1562.cfi_startproc
1563 movq %rax,%r13
1564 mulq %r14
1565 movq %rax,%r9
1566 movq %r15,%rax
1567 movq %rdx,%r10
1568
1569 mulq %r13
1570 addq %rax,%r10
1571 movq %r8,%rax
1572 adcq $0,%rdx
1573 movq %rdx,%r11
1574
1575 mulq %r13
1576 addq %rax,%r11
1577 movq %r15,%rax
1578 adcq $0,%rdx
1579 movq %rdx,%r12
1580
1581
1582 mulq %r14
1583 addq %rax,%r11
1584 movq %r8,%rax
1585 adcq $0,%rdx
1586 movq %rdx,%rbp
1587
1588 mulq %r14
1589 addq %rax,%r12
1590 movq %r8,%rax
1591 adcq $0,%rdx
1592 addq %rbp,%r12
1593 movq %rdx,%r13
1594 adcq $0,%r13
1595
1596
1597 mulq %r15
1598 xorq %r15,%r15
1599 addq %rax,%r13
1600 movq 0(%rsi),%rax
1601 movq %rdx,%r14
1602 adcq $0,%r14
1603
1604 addq %r9,%r9
1605 adcq %r10,%r10
1606 adcq %r11,%r11
1607 adcq %r12,%r12
1608 adcq %r13,%r13
1609 adcq %r14,%r14
1610 adcq $0,%r15
1611
1612 mulq %rax
1613 movq %rax,%r8
1614 movq 8(%rsi),%rax
1615 movq %rdx,%rcx
1616
1617 mulq %rax
1618 addq %rcx,%r9
1619 adcq %rax,%r10
1620 movq 16(%rsi),%rax
1621 adcq $0,%rdx
1622 movq %rdx,%rcx
1623
1624 mulq %rax
1625 addq %rcx,%r11
1626 adcq %rax,%r12
1627 movq 24(%rsi),%rax
1628 adcq $0,%rdx
1629 movq %rdx,%rcx
1630
1631 mulq %rax
1632 addq %rcx,%r13
1633 adcq %rax,%r14
1634 movq %r8,%rax
1635 adcq %rdx,%r15
1636
1637 movq .Lpoly+8(%rip),%rsi
1638 movq .Lpoly+24(%rip),%rbp
1639
1640
1641
1642
1643 movq %r8,%rcx
1644 shlq $32,%r8
1645 mulq %rbp
1646 shrq $32,%rcx
1647 addq %r8,%r9
1648 adcq %rcx,%r10
1649 adcq %rax,%r11
1650 movq %r9,%rax
1651 adcq $0,%rdx
1652
1653
1654
1655 movq %r9,%rcx
1656 shlq $32,%r9
1657 movq %rdx,%r8
1658 mulq %rbp
1659 shrq $32,%rcx
1660 addq %r9,%r10
1661 adcq %rcx,%r11
1662 adcq %rax,%r8
1663 movq %r10,%rax
1664 adcq $0,%rdx
1665
1666
1667
1668 movq %r10,%rcx
1669 shlq $32,%r10
1670 movq %rdx,%r9
1671 mulq %rbp
1672 shrq $32,%rcx
1673 addq %r10,%r11
1674 adcq %rcx,%r8
1675 adcq %rax,%r9
1676 movq %r11,%rax
1677 adcq $0,%rdx
1678
1679
1680
1681 movq %r11,%rcx
1682 shlq $32,%r11
1683 movq %rdx,%r10
1684 mulq %rbp
1685 shrq $32,%rcx
1686 addq %r11,%r8
1687 adcq %rcx,%r9
1688 adcq %rax,%r10
1689 adcq $0,%rdx
1690 xorq %r11,%r11
1691
1692
1693
1694 addq %r8,%r12
1695 adcq %r9,%r13
1696 movq %r12,%r8
1697 adcq %r10,%r14
1698 adcq %rdx,%r15
1699 movq %r13,%r9
1700 adcq $0,%r11
1701
1702 subq $-1,%r12
1703 movq %r14,%r10
1704 sbbq %rsi,%r13
1705 sbbq $0,%r14
1706 movq %r15,%rcx
1707 sbbq %rbp,%r15
1708 sbbq $0,%r11
1709
1710 cmovcq %r8,%r12
1711 cmovcq %r9,%r13
1712 movq %r12,0(%rdi)
1713 cmovcq %r10,%r14
1714 movq %r13,8(%rdi)
1715 cmovcq %rcx,%r15
1716 movq %r14,16(%rdi)
1717 movq %r15,24(%rdi)
1718
1719 .byte 0xf3,0xc3
1720.cfi_endproc
1721.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
1722.type __ecp_nistz256_mul_montx,@function
1723.align 32
1724__ecp_nistz256_mul_montx:
1725.cfi_startproc
1726
1727
1728 mulxq %r9,%r8,%r9
1729 mulxq %r10,%rcx,%r10
1730 movq $32,%r14
1731 xorq %r13,%r13
1732 mulxq %r11,%rbp,%r11
1733 movq .Lpoly+24(%rip),%r15
1734 adcq %rcx,%r9
1735 mulxq %r12,%rcx,%r12
1736 movq %r8,%rdx
1737 adcq %rbp,%r10
1738 shlxq %r14,%r8,%rbp
1739 adcq %rcx,%r11
1740 shrxq %r14,%r8,%rcx
1741 adcq $0,%r12
1742
1743
1744
1745 addq %rbp,%r9
1746 adcq %rcx,%r10
1747
1748 mulxq %r15,%rcx,%rbp
1749 movq 8(%rbx),%rdx
1750 adcq %rcx,%r11
1751 adcq %rbp,%r12
1752 adcq $0,%r13
1753 xorq %r8,%r8
1754
1755
1756
1757 mulxq 0+128(%rsi),%rcx,%rbp
1758 adcxq %rcx,%r9
1759 adoxq %rbp,%r10
1760
1761 mulxq 8+128(%rsi),%rcx,%rbp
1762 adcxq %rcx,%r10
1763 adoxq %rbp,%r11
1764
1765 mulxq 16+128(%rsi),%rcx,%rbp
1766 adcxq %rcx,%r11
1767 adoxq %rbp,%r12
1768
1769 mulxq 24+128(%rsi),%rcx,%rbp
1770 movq %r9,%rdx
1771 adcxq %rcx,%r12
1772 shlxq %r14,%r9,%rcx
1773 adoxq %rbp,%r13
1774 shrxq %r14,%r9,%rbp
1775
1776 adcxq %r8,%r13
1777 adoxq %r8,%r8
1778 adcq $0,%r8
1779
1780
1781
1782 addq %rcx,%r10
1783 adcq %rbp,%r11
1784
1785 mulxq %r15,%rcx,%rbp
1786 movq 16(%rbx),%rdx
1787 adcq %rcx,%r12
1788 adcq %rbp,%r13
1789 adcq $0,%r8
1790 xorq %r9,%r9
1791
1792
1793
1794 mulxq 0+128(%rsi),%rcx,%rbp
1795 adcxq %rcx,%r10
1796 adoxq %rbp,%r11
1797
1798 mulxq 8+128(%rsi),%rcx,%rbp
1799 adcxq %rcx,%r11
1800 adoxq %rbp,%r12
1801
1802 mulxq 16+128(%rsi),%rcx,%rbp
1803 adcxq %rcx,%r12
1804 adoxq %rbp,%r13
1805
1806 mulxq 24+128(%rsi),%rcx,%rbp
1807 movq %r10,%rdx
1808 adcxq %rcx,%r13
1809 shlxq %r14,%r10,%rcx
1810 adoxq %rbp,%r8
1811 shrxq %r14,%r10,%rbp
1812
1813 adcxq %r9,%r8
1814 adoxq %r9,%r9
1815 adcq $0,%r9
1816
1817
1818
1819 addq %rcx,%r11
1820 adcq %rbp,%r12
1821
1822 mulxq %r15,%rcx,%rbp
1823 movq 24(%rbx),%rdx
1824 adcq %rcx,%r13
1825 adcq %rbp,%r8
1826 adcq $0,%r9
1827 xorq %r10,%r10
1828
1829
1830
1831 mulxq 0+128(%rsi),%rcx,%rbp
1832 adcxq %rcx,%r11
1833 adoxq %rbp,%r12
1834
1835 mulxq 8+128(%rsi),%rcx,%rbp
1836 adcxq %rcx,%r12
1837 adoxq %rbp,%r13
1838
1839 mulxq 16+128(%rsi),%rcx,%rbp
1840 adcxq %rcx,%r13
1841 adoxq %rbp,%r8
1842
1843 mulxq 24+128(%rsi),%rcx,%rbp
1844 movq %r11,%rdx
1845 adcxq %rcx,%r8
1846 shlxq %r14,%r11,%rcx
1847 adoxq %rbp,%r9
1848 shrxq %r14,%r11,%rbp
1849
1850 adcxq %r10,%r9
1851 adoxq %r10,%r10
1852 adcq $0,%r10
1853
1854
1855
1856 addq %rcx,%r12
1857 adcq %rbp,%r13
1858
1859 mulxq %r15,%rcx,%rbp
1860 movq %r12,%rbx
1861 movq .Lpoly+8(%rip),%r14
1862 adcq %rcx,%r8
1863 movq %r13,%rdx
1864 adcq %rbp,%r9
1865 adcq $0,%r10
1866
1867
1868
1869 xorl %eax,%eax
1870 movq %r8,%rcx
1871 sbbq $-1,%r12
1872 sbbq %r14,%r13
1873 sbbq $0,%r8
1874 movq %r9,%rbp
1875 sbbq %r15,%r9
1876 sbbq $0,%r10
1877
1878 cmovcq %rbx,%r12
1879 cmovcq %rdx,%r13
1880 movq %r12,0(%rdi)
1881 cmovcq %rcx,%r8
1882 movq %r13,8(%rdi)
1883 cmovcq %rbp,%r9
1884 movq %r8,16(%rdi)
1885 movq %r9,24(%rdi)
1886
1887 .byte 0xf3,0xc3
1888.cfi_endproc
1889.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
1890
1891.type __ecp_nistz256_sqr_montx,@function
1892.align 32
1893__ecp_nistz256_sqr_montx:
1894.cfi_startproc
1895 mulxq %r14,%r9,%r10
1896 mulxq %r15,%rcx,%r11
1897 xorl %eax,%eax
1898 adcq %rcx,%r10
1899 mulxq %r8,%rbp,%r12
1900 movq %r14,%rdx
1901 adcq %rbp,%r11
1902 adcq $0,%r12
1903 xorq %r13,%r13
1904
1905
1906 mulxq %r15,%rcx,%rbp
1907 adcxq %rcx,%r11
1908 adoxq %rbp,%r12
1909
1910 mulxq %r8,%rcx,%rbp
1911 movq %r15,%rdx
1912 adcxq %rcx,%r12
1913 adoxq %rbp,%r13
1914 adcq $0,%r13
1915
1916
1917 mulxq %r8,%rcx,%r14
1918 movq 0+128(%rsi),%rdx
1919 xorq %r15,%r15
1920 adcxq %r9,%r9
1921 adoxq %rcx,%r13
1922 adcxq %r10,%r10
1923 adoxq %r15,%r14
1924
1925 mulxq %rdx,%r8,%rbp
1926 movq 8+128(%rsi),%rdx
1927 adcxq %r11,%r11
1928 adoxq %rbp,%r9
1929 adcxq %r12,%r12
1930 mulxq %rdx,%rcx,%rax
1931 movq 16+128(%rsi),%rdx
1932 adcxq %r13,%r13
1933 adoxq %rcx,%r10
1934 adcxq %r14,%r14
1935.byte 0x67
1936 mulxq %rdx,%rcx,%rbp
1937 movq 24+128(%rsi),%rdx
1938 adoxq %rax,%r11
1939 adcxq %r15,%r15
1940 adoxq %rcx,%r12
1941 movq $32,%rsi
1942 adoxq %rbp,%r13
1943.byte 0x67,0x67
1944 mulxq %rdx,%rcx,%rax
1945 movq .Lpoly+24(%rip),%rdx
1946 adoxq %rcx,%r14
1947 shlxq %rsi,%r8,%rcx
1948 adoxq %rax,%r15
1949 shrxq %rsi,%r8,%rax
1950 movq %rdx,%rbp
1951
1952
1953 addq %rcx,%r9
1954 adcq %rax,%r10
1955
1956 mulxq %r8,%rcx,%r8
1957 adcq %rcx,%r11
1958 shlxq %rsi,%r9,%rcx
1959 adcq $0,%r8
1960 shrxq %rsi,%r9,%rax
1961
1962
1963 addq %rcx,%r10
1964 adcq %rax,%r11
1965
1966 mulxq %r9,%rcx,%r9
1967 adcq %rcx,%r8
1968 shlxq %rsi,%r10,%rcx
1969 adcq $0,%r9
1970 shrxq %rsi,%r10,%rax
1971
1972
1973 addq %rcx,%r11
1974 adcq %rax,%r8
1975
1976 mulxq %r10,%rcx,%r10
1977 adcq %rcx,%r9
1978 shlxq %rsi,%r11,%rcx
1979 adcq $0,%r10
1980 shrxq %rsi,%r11,%rax
1981
1982
1983 addq %rcx,%r8
1984 adcq %rax,%r9
1985
1986 mulxq %r11,%rcx,%r11
1987 adcq %rcx,%r10
1988 adcq $0,%r11
1989
1990 xorq %rdx,%rdx
1991 addq %r8,%r12
1992 movq .Lpoly+8(%rip),%rsi
1993 adcq %r9,%r13
1994 movq %r12,%r8
1995 adcq %r10,%r14
1996 adcq %r11,%r15
1997 movq %r13,%r9
1998 adcq $0,%rdx
1999
2000 subq $-1,%r12
2001 movq %r14,%r10
2002 sbbq %rsi,%r13
2003 sbbq $0,%r14
2004 movq %r15,%r11
2005 sbbq %rbp,%r15
2006 sbbq $0,%rdx
2007
2008 cmovcq %r8,%r12
2009 cmovcq %r9,%r13
2010 movq %r12,0(%rdi)
2011 cmovcq %r10,%r14
2012 movq %r13,8(%rdi)
2013 cmovcq %r11,%r15
2014 movq %r14,16(%rdi)
2015 movq %r15,24(%rdi)
2016
2017 .byte 0xf3,0xc3
2018.cfi_endproc
2019.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
2020
2021
2022.globl ecp_nistz256_select_w5
2023.hidden ecp_nistz256_select_w5
2024.type ecp_nistz256_select_w5,@function
2025.align 32
2026ecp_nistz256_select_w5:
2027.cfi_startproc
2028 leaq OPENSSL_ia32cap_P(%rip),%rax
2029 movq 8(%rax),%rax
2030 testl $32,%eax
2031 jnz .Lavx2_select_w5
2032 movdqa .LOne(%rip),%xmm0
2033 movd %edx,%xmm1
2034
2035 pxor %xmm2,%xmm2
2036 pxor %xmm3,%xmm3
2037 pxor %xmm4,%xmm4
2038 pxor %xmm5,%xmm5
2039 pxor %xmm6,%xmm6
2040 pxor %xmm7,%xmm7
2041
2042 movdqa %xmm0,%xmm8
2043 pshufd $0,%xmm1,%xmm1
2044
2045 movq $16,%rax
2046.Lselect_loop_sse_w5:
2047
2048 movdqa %xmm8,%xmm15
2049 paddd %xmm0,%xmm8
2050 pcmpeqd %xmm1,%xmm15
2051
2052 movdqa 0(%rsi),%xmm9
2053 movdqa 16(%rsi),%xmm10
2054 movdqa 32(%rsi),%xmm11
2055 movdqa 48(%rsi),%xmm12
2056 movdqa 64(%rsi),%xmm13
2057 movdqa 80(%rsi),%xmm14
2058 leaq 96(%rsi),%rsi
2059
2060 pand %xmm15,%xmm9
2061 pand %xmm15,%xmm10
2062 por %xmm9,%xmm2
2063 pand %xmm15,%xmm11
2064 por %xmm10,%xmm3
2065 pand %xmm15,%xmm12
2066 por %xmm11,%xmm4
2067 pand %xmm15,%xmm13
2068 por %xmm12,%xmm5
2069 pand %xmm15,%xmm14
2070 por %xmm13,%xmm6
2071 por %xmm14,%xmm7
2072
2073 decq %rax
2074 jnz .Lselect_loop_sse_w5
2075
2076 movdqu %xmm2,0(%rdi)
2077 movdqu %xmm3,16(%rdi)
2078 movdqu %xmm4,32(%rdi)
2079 movdqu %xmm5,48(%rdi)
2080 movdqu %xmm6,64(%rdi)
2081 movdqu %xmm7,80(%rdi)
2082 .byte 0xf3,0xc3
2083.cfi_endproc
2084.LSEH_end_ecp_nistz256_select_w5:
2085.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
2086
2087
2088
2089.globl ecp_nistz256_select_w7
2090.hidden ecp_nistz256_select_w7
2091.type ecp_nistz256_select_w7,@function
2092.align 32
2093ecp_nistz256_select_w7:
2094.cfi_startproc
2095 leaq OPENSSL_ia32cap_P(%rip),%rax
2096 movq 8(%rax),%rax
2097 testl $32,%eax
2098 jnz .Lavx2_select_w7
2099 movdqa .LOne(%rip),%xmm8
2100 movd %edx,%xmm1
2101
2102 pxor %xmm2,%xmm2
2103 pxor %xmm3,%xmm3
2104 pxor %xmm4,%xmm4
2105 pxor %xmm5,%xmm5
2106
2107 movdqa %xmm8,%xmm0
2108 pshufd $0,%xmm1,%xmm1
2109 movq $64,%rax
2110
2111.Lselect_loop_sse_w7:
2112 movdqa %xmm8,%xmm15
2113 paddd %xmm0,%xmm8
2114 movdqa 0(%rsi),%xmm9
2115 movdqa 16(%rsi),%xmm10
2116 pcmpeqd %xmm1,%xmm15
2117 movdqa 32(%rsi),%xmm11
2118 movdqa 48(%rsi),%xmm12
2119 leaq 64(%rsi),%rsi
2120
2121 pand %xmm15,%xmm9
2122 pand %xmm15,%xmm10
2123 por %xmm9,%xmm2
2124 pand %xmm15,%xmm11
2125 por %xmm10,%xmm3
2126 pand %xmm15,%xmm12
2127 por %xmm11,%xmm4
2128 prefetcht0 255(%rsi)
2129 por %xmm12,%xmm5
2130
2131 decq %rax
2132 jnz .Lselect_loop_sse_w7
2133
2134 movdqu %xmm2,0(%rdi)
2135 movdqu %xmm3,16(%rdi)
2136 movdqu %xmm4,32(%rdi)
2137 movdqu %xmm5,48(%rdi)
2138 .byte 0xf3,0xc3
2139.cfi_endproc
2140.LSEH_end_ecp_nistz256_select_w7:
2141.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
2142
2143
2144.type ecp_nistz256_avx2_select_w5,@function
2145.align 32
2146ecp_nistz256_avx2_select_w5:
2147.cfi_startproc
2148.Lavx2_select_w5:
2149 vzeroupper
2150 vmovdqa .LTwo(%rip),%ymm0
2151
2152 vpxor %ymm2,%ymm2,%ymm2
2153 vpxor %ymm3,%ymm3,%ymm3
2154 vpxor %ymm4,%ymm4,%ymm4
2155
2156 vmovdqa .LOne(%rip),%ymm5
2157 vmovdqa .LTwo(%rip),%ymm10
2158
2159 vmovd %edx,%xmm1
2160 vpermd %ymm1,%ymm2,%ymm1
2161
2162 movq $8,%rax
2163.Lselect_loop_avx2_w5:
2164
2165 vmovdqa 0(%rsi),%ymm6
2166 vmovdqa 32(%rsi),%ymm7
2167 vmovdqa 64(%rsi),%ymm8
2168
2169 vmovdqa 96(%rsi),%ymm11
2170 vmovdqa 128(%rsi),%ymm12
2171 vmovdqa 160(%rsi),%ymm13
2172
2173 vpcmpeqd %ymm1,%ymm5,%ymm9
2174 vpcmpeqd %ymm1,%ymm10,%ymm14
2175
2176 vpaddd %ymm0,%ymm5,%ymm5
2177 vpaddd %ymm0,%ymm10,%ymm10
2178 leaq 192(%rsi),%rsi
2179
2180 vpand %ymm9,%ymm6,%ymm6
2181 vpand %ymm9,%ymm7,%ymm7
2182 vpand %ymm9,%ymm8,%ymm8
2183 vpand %ymm14,%ymm11,%ymm11
2184 vpand %ymm14,%ymm12,%ymm12
2185 vpand %ymm14,%ymm13,%ymm13
2186
2187 vpxor %ymm6,%ymm2,%ymm2
2188 vpxor %ymm7,%ymm3,%ymm3
2189 vpxor %ymm8,%ymm4,%ymm4
2190 vpxor %ymm11,%ymm2,%ymm2
2191 vpxor %ymm12,%ymm3,%ymm3
2192 vpxor %ymm13,%ymm4,%ymm4
2193
2194 decq %rax
2195 jnz .Lselect_loop_avx2_w5
2196
2197 vmovdqu %ymm2,0(%rdi)
2198 vmovdqu %ymm3,32(%rdi)
2199 vmovdqu %ymm4,64(%rdi)
2200 vzeroupper
2201 .byte 0xf3,0xc3
2202.cfi_endproc
2203.LSEH_end_ecp_nistz256_avx2_select_w5:
2204.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
2205
2206
2207
2208.globl ecp_nistz256_avx2_select_w7
2209.hidden ecp_nistz256_avx2_select_w7
2210.type ecp_nistz256_avx2_select_w7,@function
2211.align 32
2212ecp_nistz256_avx2_select_w7:
2213.cfi_startproc
2214.Lavx2_select_w7:
2215 vzeroupper
2216 vmovdqa .LThree(%rip),%ymm0
2217
2218 vpxor %ymm2,%ymm2,%ymm2
2219 vpxor %ymm3,%ymm3,%ymm3
2220
2221 vmovdqa .LOne(%rip),%ymm4
2222 vmovdqa .LTwo(%rip),%ymm8
2223 vmovdqa .LThree(%rip),%ymm12
2224
2225 vmovd %edx,%xmm1
2226 vpermd %ymm1,%ymm2,%ymm1
2227
2228
2229 movq $21,%rax
2230.Lselect_loop_avx2_w7:
2231
2232 vmovdqa 0(%rsi),%ymm5
2233 vmovdqa 32(%rsi),%ymm6
2234
2235 vmovdqa 64(%rsi),%ymm9
2236 vmovdqa 96(%rsi),%ymm10
2237
2238 vmovdqa 128(%rsi),%ymm13
2239 vmovdqa 160(%rsi),%ymm14
2240
2241 vpcmpeqd %ymm1,%ymm4,%ymm7
2242 vpcmpeqd %ymm1,%ymm8,%ymm11
2243 vpcmpeqd %ymm1,%ymm12,%ymm15
2244
2245 vpaddd %ymm0,%ymm4,%ymm4
2246 vpaddd %ymm0,%ymm8,%ymm8
2247 vpaddd %ymm0,%ymm12,%ymm12
2248 leaq 192(%rsi),%rsi
2249
2250 vpand %ymm7,%ymm5,%ymm5
2251 vpand %ymm7,%ymm6,%ymm6
2252 vpand %ymm11,%ymm9,%ymm9
2253 vpand %ymm11,%ymm10,%ymm10
2254 vpand %ymm15,%ymm13,%ymm13
2255 vpand %ymm15,%ymm14,%ymm14
2256
2257 vpxor %ymm5,%ymm2,%ymm2
2258 vpxor %ymm6,%ymm3,%ymm3
2259 vpxor %ymm9,%ymm2,%ymm2
2260 vpxor %ymm10,%ymm3,%ymm3
2261 vpxor %ymm13,%ymm2,%ymm2
2262 vpxor %ymm14,%ymm3,%ymm3
2263
2264 decq %rax
2265 jnz .Lselect_loop_avx2_w7
2266
2267
2268 vmovdqa 0(%rsi),%ymm5
2269 vmovdqa 32(%rsi),%ymm6
2270
2271 vpcmpeqd %ymm1,%ymm4,%ymm7
2272
2273 vpand %ymm7,%ymm5,%ymm5
2274 vpand %ymm7,%ymm6,%ymm6
2275
2276 vpxor %ymm5,%ymm2,%ymm2
2277 vpxor %ymm6,%ymm3,%ymm3
2278
2279 vmovdqu %ymm2,0(%rdi)
2280 vmovdqu %ymm3,32(%rdi)
2281 vzeroupper
2282 .byte 0xf3,0xc3
2283.cfi_endproc
2284.LSEH_end_ecp_nistz256_avx2_select_w7:
2285.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
2286.type __ecp_nistz256_add_toq,@function
2287.align 32
2288__ecp_nistz256_add_toq:
2289.cfi_startproc
2290 xorq %r11,%r11
2291 addq 0(%rbx),%r12
2292 adcq 8(%rbx),%r13
2293 movq %r12,%rax
2294 adcq 16(%rbx),%r8
2295 adcq 24(%rbx),%r9
2296 movq %r13,%rbp
2297 adcq $0,%r11
2298
2299 subq $-1,%r12
2300 movq %r8,%rcx
2301 sbbq %r14,%r13
2302 sbbq $0,%r8
2303 movq %r9,%r10
2304 sbbq %r15,%r9
2305 sbbq $0,%r11
2306
2307 cmovcq %rax,%r12
2308 cmovcq %rbp,%r13
2309 movq %r12,0(%rdi)
2310 cmovcq %rcx,%r8
2311 movq %r13,8(%rdi)
2312 cmovcq %r10,%r9
2313 movq %r8,16(%rdi)
2314 movq %r9,24(%rdi)
2315
2316 .byte 0xf3,0xc3
2317.cfi_endproc
2318.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
2319
2320.type __ecp_nistz256_sub_fromq,@function
2321.align 32
2322__ecp_nistz256_sub_fromq:
2323.cfi_startproc
2324 subq 0(%rbx),%r12
2325 sbbq 8(%rbx),%r13
2326 movq %r12,%rax
2327 sbbq 16(%rbx),%r8
2328 sbbq 24(%rbx),%r9
2329 movq %r13,%rbp
2330 sbbq %r11,%r11
2331
2332 addq $-1,%r12
2333 movq %r8,%rcx
2334 adcq %r14,%r13
2335 adcq $0,%r8
2336 movq %r9,%r10
2337 adcq %r15,%r9
2338 testq %r11,%r11
2339
2340 cmovzq %rax,%r12
2341 cmovzq %rbp,%r13
2342 movq %r12,0(%rdi)
2343 cmovzq %rcx,%r8
2344 movq %r13,8(%rdi)
2345 cmovzq %r10,%r9
2346 movq %r8,16(%rdi)
2347 movq %r9,24(%rdi)
2348
2349 .byte 0xf3,0xc3
2350.cfi_endproc
2351.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
2352
2353.type __ecp_nistz256_subq,@function
2354.align 32
2355__ecp_nistz256_subq:
2356.cfi_startproc
2357 subq %r12,%rax
2358 sbbq %r13,%rbp
2359 movq %rax,%r12
2360 sbbq %r8,%rcx
2361 sbbq %r9,%r10
2362 movq %rbp,%r13
2363 sbbq %r11,%r11
2364
2365 addq $-1,%rax
2366 movq %rcx,%r8
2367 adcq %r14,%rbp
2368 adcq $0,%rcx
2369 movq %r10,%r9
2370 adcq %r15,%r10
2371 testq %r11,%r11
2372
2373 cmovnzq %rax,%r12
2374 cmovnzq %rbp,%r13
2375 cmovnzq %rcx,%r8
2376 cmovnzq %r10,%r9
2377
2378 .byte 0xf3,0xc3
2379.cfi_endproc
2380.size __ecp_nistz256_subq,.-__ecp_nistz256_subq
2381
2382.type __ecp_nistz256_mul_by_2q,@function
2383.align 32
2384__ecp_nistz256_mul_by_2q:
2385.cfi_startproc
2386 xorq %r11,%r11
2387 addq %r12,%r12
2388 adcq %r13,%r13
2389 movq %r12,%rax
2390 adcq %r8,%r8
2391 adcq %r9,%r9
2392 movq %r13,%rbp
2393 adcq $0,%r11
2394
2395 subq $-1,%r12
2396 movq %r8,%rcx
2397 sbbq %r14,%r13
2398 sbbq $0,%r8
2399 movq %r9,%r10
2400 sbbq %r15,%r9
2401 sbbq $0,%r11
2402
2403 cmovcq %rax,%r12
2404 cmovcq %rbp,%r13
2405 movq %r12,0(%rdi)
2406 cmovcq %rcx,%r8
2407 movq %r13,8(%rdi)
2408 cmovcq %r10,%r9
2409 movq %r8,16(%rdi)
2410 movq %r9,24(%rdi)
2411
2412 .byte 0xf3,0xc3
2413.cfi_endproc
2414.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
2415.globl ecp_nistz256_point_double
2416.hidden ecp_nistz256_point_double
2417.type ecp_nistz256_point_double,@function
2418.align 32
2419ecp_nistz256_point_double:
2420.cfi_startproc
2421 leaq OPENSSL_ia32cap_P(%rip),%rcx
2422 movq 8(%rcx),%rcx
2423 andl $0x80100,%ecx
2424 cmpl $0x80100,%ecx
2425 je .Lpoint_doublex
2426 pushq %rbp
2427.cfi_adjust_cfa_offset 8
2428.cfi_offset %rbp,-16
2429 pushq %rbx
2430.cfi_adjust_cfa_offset 8
2431.cfi_offset %rbx,-24
2432 pushq %r12
2433.cfi_adjust_cfa_offset 8
2434.cfi_offset %r12,-32
2435 pushq %r13
2436.cfi_adjust_cfa_offset 8
2437.cfi_offset %r13,-40
2438 pushq %r14
2439.cfi_adjust_cfa_offset 8
2440.cfi_offset %r14,-48
2441 pushq %r15
2442.cfi_adjust_cfa_offset 8
2443.cfi_offset %r15,-56
2444 subq $160+8,%rsp
2445.cfi_adjust_cfa_offset 32*5+8
2446.Lpoint_doubleq_body:
2447
2448.Lpoint_double_shortcutq:
2449 movdqu 0(%rsi),%xmm0
2450 movq %rsi,%rbx
2451 movdqu 16(%rsi),%xmm1
2452 movq 32+0(%rsi),%r12
2453 movq 32+8(%rsi),%r13
2454 movq 32+16(%rsi),%r8
2455 movq 32+24(%rsi),%r9
2456 movq .Lpoly+8(%rip),%r14
2457 movq .Lpoly+24(%rip),%r15
2458 movdqa %xmm0,96(%rsp)
2459 movdqa %xmm1,96+16(%rsp)
2460 leaq 32(%rdi),%r10
2461 leaq 64(%rdi),%r11
2462.byte 102,72,15,110,199
2463.byte 102,73,15,110,202
2464.byte 102,73,15,110,211
2465
2466 leaq 0(%rsp),%rdi
2467 call __ecp_nistz256_mul_by_2q
2468
2469 movq 64+0(%rsi),%rax
2470 movq 64+8(%rsi),%r14
2471 movq 64+16(%rsi),%r15
2472 movq 64+24(%rsi),%r8
2473 leaq 64-0(%rsi),%rsi
2474 leaq 64(%rsp),%rdi
2475 call __ecp_nistz256_sqr_montq
2476
2477 movq 0+0(%rsp),%rax
2478 movq 8+0(%rsp),%r14
2479 leaq 0+0(%rsp),%rsi
2480 movq 16+0(%rsp),%r15
2481 movq 24+0(%rsp),%r8
2482 leaq 0(%rsp),%rdi
2483 call __ecp_nistz256_sqr_montq
2484
2485 movq 32(%rbx),%rax
2486 movq 64+0(%rbx),%r9
2487 movq 64+8(%rbx),%r10
2488 movq 64+16(%rbx),%r11
2489 movq 64+24(%rbx),%r12
2490 leaq 64-0(%rbx),%rsi
2491 leaq 32(%rbx),%rbx
2492.byte 102,72,15,126,215
2493 call __ecp_nistz256_mul_montq
2494 call __ecp_nistz256_mul_by_2q
2495
2496 movq 96+0(%rsp),%r12
2497 movq 96+8(%rsp),%r13
2498 leaq 64(%rsp),%rbx
2499 movq 96+16(%rsp),%r8
2500 movq 96+24(%rsp),%r9
2501 leaq 32(%rsp),%rdi
2502 call __ecp_nistz256_add_toq
2503
2504 movq 96+0(%rsp),%r12
2505 movq 96+8(%rsp),%r13
2506 leaq 64(%rsp),%rbx
2507 movq 96+16(%rsp),%r8
2508 movq 96+24(%rsp),%r9
2509 leaq 64(%rsp),%rdi
2510 call __ecp_nistz256_sub_fromq
2511
2512 movq 0+0(%rsp),%rax
2513 movq 8+0(%rsp),%r14
2514 leaq 0+0(%rsp),%rsi
2515 movq 16+0(%rsp),%r15
2516 movq 24+0(%rsp),%r8
2517.byte 102,72,15,126,207
2518 call __ecp_nistz256_sqr_montq
2519 xorq %r9,%r9
2520 movq %r12,%rax
2521 addq $-1,%r12
2522 movq %r13,%r10
2523 adcq %rsi,%r13
2524 movq %r14,%rcx
2525 adcq $0,%r14
2526 movq %r15,%r8
2527 adcq %rbp,%r15
2528 adcq $0,%r9
2529 xorq %rsi,%rsi
2530 testq $1,%rax
2531
2532 cmovzq %rax,%r12
2533 cmovzq %r10,%r13
2534 cmovzq %rcx,%r14
2535 cmovzq %r8,%r15
2536 cmovzq %rsi,%r9
2537
2538 movq %r13,%rax
2539 shrq $1,%r12
2540 shlq $63,%rax
2541 movq %r14,%r10
2542 shrq $1,%r13
2543 orq %rax,%r12
2544 shlq $63,%r10
2545 movq %r15,%rcx
2546 shrq $1,%r14
2547 orq %r10,%r13
2548 shlq $63,%rcx
2549 movq %r12,0(%rdi)
2550 shrq $1,%r15
2551 movq %r13,8(%rdi)
2552 shlq $63,%r9
2553 orq %rcx,%r14
2554 orq %r9,%r15
2555 movq %r14,16(%rdi)
2556 movq %r15,24(%rdi)
2557 movq 64(%rsp),%rax
2558 leaq 64(%rsp),%rbx
2559 movq 0+32(%rsp),%r9
2560 movq 8+32(%rsp),%r10
2561 leaq 0+32(%rsp),%rsi
2562 movq 16+32(%rsp),%r11
2563 movq 24+32(%rsp),%r12
2564 leaq 32(%rsp),%rdi
2565 call __ecp_nistz256_mul_montq
2566
2567 leaq 128(%rsp),%rdi
2568 call __ecp_nistz256_mul_by_2q
2569
2570 leaq 32(%rsp),%rbx
2571 leaq 32(%rsp),%rdi
2572 call __ecp_nistz256_add_toq
2573
2574 movq 96(%rsp),%rax
2575 leaq 96(%rsp),%rbx
2576 movq 0+0(%rsp),%r9
2577 movq 8+0(%rsp),%r10
2578 leaq 0+0(%rsp),%rsi
2579 movq 16+0(%rsp),%r11
2580 movq 24+0(%rsp),%r12
2581 leaq 0(%rsp),%rdi
2582 call __ecp_nistz256_mul_montq
2583
2584 leaq 128(%rsp),%rdi
2585 call __ecp_nistz256_mul_by_2q
2586
2587 movq 0+32(%rsp),%rax
2588 movq 8+32(%rsp),%r14
2589 leaq 0+32(%rsp),%rsi
2590 movq 16+32(%rsp),%r15
2591 movq 24+32(%rsp),%r8
2592.byte 102,72,15,126,199
2593 call __ecp_nistz256_sqr_montq
2594
2595 leaq 128(%rsp),%rbx
2596 movq %r14,%r8
2597 movq %r15,%r9
2598 movq %rsi,%r14
2599 movq %rbp,%r15
2600 call __ecp_nistz256_sub_fromq
2601
2602 movq 0+0(%rsp),%rax
2603 movq 0+8(%rsp),%rbp
2604 movq 0+16(%rsp),%rcx
2605 movq 0+24(%rsp),%r10
2606 leaq 0(%rsp),%rdi
2607 call __ecp_nistz256_subq
2608
2609 movq 32(%rsp),%rax
2610 leaq 32(%rsp),%rbx
2611 movq %r12,%r14
2612 xorl %ecx,%ecx
2613 movq %r12,0+0(%rsp)
2614 movq %r13,%r10
2615 movq %r13,0+8(%rsp)
2616 cmovzq %r8,%r11
2617 movq %r8,0+16(%rsp)
2618 leaq 0-0(%rsp),%rsi
2619 cmovzq %r9,%r12
2620 movq %r9,0+24(%rsp)
2621 movq %r14,%r9
2622 leaq 0(%rsp),%rdi
2623 call __ecp_nistz256_mul_montq
2624
2625.byte 102,72,15,126,203
2626.byte 102,72,15,126,207
2627 call __ecp_nistz256_sub_fromq
2628
2629 leaq 160+56(%rsp),%rsi
2630.cfi_def_cfa %rsi,8
2631 movq -48(%rsi),%r15
2632.cfi_restore %r15
2633 movq -40(%rsi),%r14
2634.cfi_restore %r14
2635 movq -32(%rsi),%r13
2636.cfi_restore %r13
2637 movq -24(%rsi),%r12
2638.cfi_restore %r12
2639 movq -16(%rsi),%rbx
2640.cfi_restore %rbx
2641 movq -8(%rsi),%rbp
2642.cfi_restore %rbp
2643 leaq (%rsi),%rsp
2644.cfi_def_cfa_register %rsp
2645.Lpoint_doubleq_epilogue:
2646 .byte 0xf3,0xc3
2647.cfi_endproc
2648.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
2649.globl ecp_nistz256_point_add
2650.hidden ecp_nistz256_point_add
2651.type ecp_nistz256_point_add,@function
2652.align 32
2653ecp_nistz256_point_add:
2654.cfi_startproc
2655 leaq OPENSSL_ia32cap_P(%rip),%rcx
2656 movq 8(%rcx),%rcx
2657 andl $0x80100,%ecx
2658 cmpl $0x80100,%ecx
2659 je .Lpoint_addx
2660 pushq %rbp
2661.cfi_adjust_cfa_offset 8
2662.cfi_offset %rbp,-16
2663 pushq %rbx
2664.cfi_adjust_cfa_offset 8
2665.cfi_offset %rbx,-24
2666 pushq %r12
2667.cfi_adjust_cfa_offset 8
2668.cfi_offset %r12,-32
2669 pushq %r13
2670.cfi_adjust_cfa_offset 8
2671.cfi_offset %r13,-40
2672 pushq %r14
2673.cfi_adjust_cfa_offset 8
2674.cfi_offset %r14,-48
2675 pushq %r15
2676.cfi_adjust_cfa_offset 8
2677.cfi_offset %r15,-56
2678 subq $576+8,%rsp
2679.cfi_adjust_cfa_offset 32*18+8
2680.Lpoint_addq_body:
2681
2682 movdqu 0(%rsi),%xmm0
2683 movdqu 16(%rsi),%xmm1
2684 movdqu 32(%rsi),%xmm2
2685 movdqu 48(%rsi),%xmm3
2686 movdqu 64(%rsi),%xmm4
2687 movdqu 80(%rsi),%xmm5
2688 movq %rsi,%rbx
2689 movq %rdx,%rsi
2690 movdqa %xmm0,384(%rsp)
2691 movdqa %xmm1,384+16(%rsp)
2692 movdqa %xmm2,416(%rsp)
2693 movdqa %xmm3,416+16(%rsp)
2694 movdqa %xmm4,448(%rsp)
2695 movdqa %xmm5,448+16(%rsp)
2696 por %xmm4,%xmm5
2697
2698 movdqu 0(%rsi),%xmm0
2699 pshufd $0xb1,%xmm5,%xmm3
2700 movdqu 16(%rsi),%xmm1
2701 movdqu 32(%rsi),%xmm2
2702 por %xmm3,%xmm5
2703 movdqu 48(%rsi),%xmm3
2704 movq 64+0(%rsi),%rax
2705 movq 64+8(%rsi),%r14
2706 movq 64+16(%rsi),%r15
2707 movq 64+24(%rsi),%r8
2708 movdqa %xmm0,480(%rsp)
2709 pshufd $0x1e,%xmm5,%xmm4
2710 movdqa %xmm1,480+16(%rsp)
2711 movdqu 64(%rsi),%xmm0
2712 movdqu 80(%rsi),%xmm1
2713 movdqa %xmm2,512(%rsp)
2714 movdqa %xmm3,512+16(%rsp)
2715 por %xmm4,%xmm5
2716 pxor %xmm4,%xmm4
2717 por %xmm0,%xmm1
2718.byte 102,72,15,110,199
2719
2720 leaq 64-0(%rsi),%rsi
2721 movq %rax,544+0(%rsp)
2722 movq %r14,544+8(%rsp)
2723 movq %r15,544+16(%rsp)
2724 movq %r8,544+24(%rsp)
2725 leaq 96(%rsp),%rdi
2726 call __ecp_nistz256_sqr_montq
2727
2728 pcmpeqd %xmm4,%xmm5
2729 pshufd $0xb1,%xmm1,%xmm4
2730 por %xmm1,%xmm4
2731 pshufd $0,%xmm5,%xmm5
2732 pshufd $0x1e,%xmm4,%xmm3
2733 por %xmm3,%xmm4
2734 pxor %xmm3,%xmm3
2735 pcmpeqd %xmm3,%xmm4
2736 pshufd $0,%xmm4,%xmm4
2737 movq 64+0(%rbx),%rax
2738 movq 64+8(%rbx),%r14
2739 movq 64+16(%rbx),%r15
2740 movq 64+24(%rbx),%r8
2741.byte 102,72,15,110,203
2742
2743 leaq 64-0(%rbx),%rsi
2744 leaq 32(%rsp),%rdi
2745 call __ecp_nistz256_sqr_montq
2746
2747 movq 544(%rsp),%rax
2748 leaq 544(%rsp),%rbx
2749 movq 0+96(%rsp),%r9
2750 movq 8+96(%rsp),%r10
2751 leaq 0+96(%rsp),%rsi
2752 movq 16+96(%rsp),%r11
2753 movq 24+96(%rsp),%r12
2754 leaq 224(%rsp),%rdi
2755 call __ecp_nistz256_mul_montq
2756
2757 movq 448(%rsp),%rax
2758 leaq 448(%rsp),%rbx
2759 movq 0+32(%rsp),%r9
2760 movq 8+32(%rsp),%r10
2761 leaq 0+32(%rsp),%rsi
2762 movq 16+32(%rsp),%r11
2763 movq 24+32(%rsp),%r12
2764 leaq 256(%rsp),%rdi
2765 call __ecp_nistz256_mul_montq
2766
2767 movq 416(%rsp),%rax
2768 leaq 416(%rsp),%rbx
2769 movq 0+224(%rsp),%r9
2770 movq 8+224(%rsp),%r10
2771 leaq 0+224(%rsp),%rsi
2772 movq 16+224(%rsp),%r11
2773 movq 24+224(%rsp),%r12
2774 leaq 224(%rsp),%rdi
2775 call __ecp_nistz256_mul_montq
2776
2777 movq 512(%rsp),%rax
2778 leaq 512(%rsp),%rbx
2779 movq 0+256(%rsp),%r9
2780 movq 8+256(%rsp),%r10
2781 leaq 0+256(%rsp),%rsi
2782 movq 16+256(%rsp),%r11
2783 movq 24+256(%rsp),%r12
2784 leaq 256(%rsp),%rdi
2785 call __ecp_nistz256_mul_montq
2786
2787 leaq 224(%rsp),%rbx
2788 leaq 64(%rsp),%rdi
2789 call __ecp_nistz256_sub_fromq
2790
2791 orq %r13,%r12
2792 movdqa %xmm4,%xmm2
2793 orq %r8,%r12
2794 orq %r9,%r12
2795 por %xmm5,%xmm2
2796.byte 102,73,15,110,220
2797
2798 movq 384(%rsp),%rax
2799 leaq 384(%rsp),%rbx
2800 movq 0+96(%rsp),%r9
2801 movq 8+96(%rsp),%r10
2802 leaq 0+96(%rsp),%rsi
2803 movq 16+96(%rsp),%r11
2804 movq 24+96(%rsp),%r12
2805 leaq 160(%rsp),%rdi
2806 call __ecp_nistz256_mul_montq
2807
2808 movq 480(%rsp),%rax
2809 leaq 480(%rsp),%rbx
2810 movq 0+32(%rsp),%r9
2811 movq 8+32(%rsp),%r10
2812 leaq 0+32(%rsp),%rsi
2813 movq 16+32(%rsp),%r11
2814 movq 24+32(%rsp),%r12
2815 leaq 192(%rsp),%rdi
2816 call __ecp_nistz256_mul_montq
2817
2818 leaq 160(%rsp),%rbx
2819 leaq 0(%rsp),%rdi
2820 call __ecp_nistz256_sub_fromq
2821
2822 orq %r13,%r12
2823 orq %r8,%r12
2824 orq %r9,%r12
2825
2826.byte 102,73,15,126,208
2827.byte 102,73,15,126,217
2828 orq %r8,%r12
2829.byte 0x3e
2830 jnz .Ladd_proceedq
2831
2832
2833
2834 testq %r9,%r9
2835 jz .Ladd_doubleq
2836
2837
2838
2839
2840
2841
2842.byte 102,72,15,126,199
2843 pxor %xmm0,%xmm0
2844 movdqu %xmm0,0(%rdi)
2845 movdqu %xmm0,16(%rdi)
2846 movdqu %xmm0,32(%rdi)
2847 movdqu %xmm0,48(%rdi)
2848 movdqu %xmm0,64(%rdi)
2849 movdqu %xmm0,80(%rdi)
2850 jmp .Ladd_doneq
2851
2852.align 32
2853.Ladd_doubleq:
2854.byte 102,72,15,126,206
2855.byte 102,72,15,126,199
2856 addq $416,%rsp
2857.cfi_adjust_cfa_offset -416
2858 jmp .Lpoint_double_shortcutq
2859.cfi_adjust_cfa_offset 416
2860
2861.align 32
2862.Ladd_proceedq:
2863 movq 0+64(%rsp),%rax
2864 movq 8+64(%rsp),%r14
2865 leaq 0+64(%rsp),%rsi
2866 movq 16+64(%rsp),%r15
2867 movq 24+64(%rsp),%r8
2868 leaq 96(%rsp),%rdi
2869 call __ecp_nistz256_sqr_montq
2870
2871 movq 448(%rsp),%rax
2872 leaq 448(%rsp),%rbx
2873 movq 0+0(%rsp),%r9
2874 movq 8+0(%rsp),%r10
2875 leaq 0+0(%rsp),%rsi
2876 movq 16+0(%rsp),%r11
2877 movq 24+0(%rsp),%r12
2878 leaq 352(%rsp),%rdi
2879 call __ecp_nistz256_mul_montq
2880
2881 movq 0+0(%rsp),%rax
2882 movq 8+0(%rsp),%r14
2883 leaq 0+0(%rsp),%rsi
2884 movq 16+0(%rsp),%r15
2885 movq 24+0(%rsp),%r8
2886 leaq 32(%rsp),%rdi
2887 call __ecp_nistz256_sqr_montq
2888
2889 movq 544(%rsp),%rax
2890 leaq 544(%rsp),%rbx
2891 movq 0+352(%rsp),%r9
2892 movq 8+352(%rsp),%r10
2893 leaq 0+352(%rsp),%rsi
2894 movq 16+352(%rsp),%r11
2895 movq 24+352(%rsp),%r12
2896 leaq 352(%rsp),%rdi
2897 call __ecp_nistz256_mul_montq
2898
2899 movq 0(%rsp),%rax
2900 leaq 0(%rsp),%rbx
2901 movq 0+32(%rsp),%r9
2902 movq 8+32(%rsp),%r10
2903 leaq 0+32(%rsp),%rsi
2904 movq 16+32(%rsp),%r11
2905 movq 24+32(%rsp),%r12
2906 leaq 128(%rsp),%rdi
2907 call __ecp_nistz256_mul_montq
2908
2909 movq 160(%rsp),%rax
2910 leaq 160(%rsp),%rbx
2911 movq 0+32(%rsp),%r9
2912 movq 8+32(%rsp),%r10
2913 leaq 0+32(%rsp),%rsi
2914 movq 16+32(%rsp),%r11
2915 movq 24+32(%rsp),%r12
2916 leaq 192(%rsp),%rdi
2917 call __ecp_nistz256_mul_montq
2918
2919
2920
2921
2922 xorq %r11,%r11
2923 addq %r12,%r12
2924 leaq 96(%rsp),%rsi
2925 adcq %r13,%r13
2926 movq %r12,%rax
2927 adcq %r8,%r8
2928 adcq %r9,%r9
2929 movq %r13,%rbp
2930 adcq $0,%r11
2931
2932 subq $-1,%r12
2933 movq %r8,%rcx
2934 sbbq %r14,%r13
2935 sbbq $0,%r8
2936 movq %r9,%r10
2937 sbbq %r15,%r9
2938 sbbq $0,%r11
2939
2940 cmovcq %rax,%r12
2941 movq 0(%rsi),%rax
2942 cmovcq %rbp,%r13
2943 movq 8(%rsi),%rbp
2944 cmovcq %rcx,%r8
2945 movq 16(%rsi),%rcx
2946 cmovcq %r10,%r9
2947 movq 24(%rsi),%r10
2948
2949 call __ecp_nistz256_subq
2950
2951 leaq 128(%rsp),%rbx
2952 leaq 288(%rsp),%rdi
2953 call __ecp_nistz256_sub_fromq
2954
2955 movq 192+0(%rsp),%rax
2956 movq 192+8(%rsp),%rbp
2957 movq 192+16(%rsp),%rcx
2958 movq 192+24(%rsp),%r10
2959 leaq 320(%rsp),%rdi
2960
2961 call __ecp_nistz256_subq
2962
2963 movq %r12,0(%rdi)
2964 movq %r13,8(%rdi)
2965 movq %r8,16(%rdi)
2966 movq %r9,24(%rdi)
2967 movq 128(%rsp),%rax
2968 leaq 128(%rsp),%rbx
2969 movq 0+224(%rsp),%r9
2970 movq 8+224(%rsp),%r10
2971 leaq 0+224(%rsp),%rsi
2972 movq 16+224(%rsp),%r11
2973 movq 24+224(%rsp),%r12
2974 leaq 256(%rsp),%rdi
2975 call __ecp_nistz256_mul_montq
2976
2977 movq 320(%rsp),%rax
2978 leaq 320(%rsp),%rbx
2979 movq 0+64(%rsp),%r9
2980 movq 8+64(%rsp),%r10
2981 leaq 0+64(%rsp),%rsi
2982 movq 16+64(%rsp),%r11
2983 movq 24+64(%rsp),%r12
2984 leaq 320(%rsp),%rdi
2985 call __ecp_nistz256_mul_montq
2986
2987 leaq 256(%rsp),%rbx
2988 leaq 320(%rsp),%rdi
2989 call __ecp_nistz256_sub_fromq
2990
2991.byte 102,72,15,126,199
2992
2993 movdqa %xmm5,%xmm0
2994 movdqa %xmm5,%xmm1
2995 pandn 352(%rsp),%xmm0
2996 movdqa %xmm5,%xmm2
2997 pandn 352+16(%rsp),%xmm1
2998 movdqa %xmm5,%xmm3
2999 pand 544(%rsp),%xmm2
3000 pand 544+16(%rsp),%xmm3
3001 por %xmm0,%xmm2
3002 por %xmm1,%xmm3
3003
3004 movdqa %xmm4,%xmm0
3005 movdqa %xmm4,%xmm1
3006 pandn %xmm2,%xmm0
3007 movdqa %xmm4,%xmm2
3008 pandn %xmm3,%xmm1
3009 movdqa %xmm4,%xmm3
3010 pand 448(%rsp),%xmm2
3011 pand 448+16(%rsp),%xmm3
3012 por %xmm0,%xmm2
3013 por %xmm1,%xmm3
3014 movdqu %xmm2,64(%rdi)
3015 movdqu %xmm3,80(%rdi)
3016
3017 movdqa %xmm5,%xmm0
3018 movdqa %xmm5,%xmm1
3019 pandn 288(%rsp),%xmm0
3020 movdqa %xmm5,%xmm2
3021 pandn 288+16(%rsp),%xmm1
3022 movdqa %xmm5,%xmm3
3023 pand 480(%rsp),%xmm2
3024 pand 480+16(%rsp),%xmm3
3025 por %xmm0,%xmm2
3026 por %xmm1,%xmm3
3027
3028 movdqa %xmm4,%xmm0
3029 movdqa %xmm4,%xmm1
3030 pandn %xmm2,%xmm0
3031 movdqa %xmm4,%xmm2
3032 pandn %xmm3,%xmm1
3033 movdqa %xmm4,%xmm3
3034 pand 384(%rsp),%xmm2
3035 pand 384+16(%rsp),%xmm3
3036 por %xmm0,%xmm2
3037 por %xmm1,%xmm3
3038 movdqu %xmm2,0(%rdi)
3039 movdqu %xmm3,16(%rdi)
3040
3041 movdqa %xmm5,%xmm0
3042 movdqa %xmm5,%xmm1
3043 pandn 320(%rsp),%xmm0
3044 movdqa %xmm5,%xmm2
3045 pandn 320+16(%rsp),%xmm1
3046 movdqa %xmm5,%xmm3
3047 pand 512(%rsp),%xmm2
3048 pand 512+16(%rsp),%xmm3
3049 por %xmm0,%xmm2
3050 por %xmm1,%xmm3
3051
3052 movdqa %xmm4,%xmm0
3053 movdqa %xmm4,%xmm1
3054 pandn %xmm2,%xmm0
3055 movdqa %xmm4,%xmm2
3056 pandn %xmm3,%xmm1
3057 movdqa %xmm4,%xmm3
3058 pand 416(%rsp),%xmm2
3059 pand 416+16(%rsp),%xmm3
3060 por %xmm0,%xmm2
3061 por %xmm1,%xmm3
3062 movdqu %xmm2,32(%rdi)
3063 movdqu %xmm3,48(%rdi)
3064
3065.Ladd_doneq:
3066 leaq 576+56(%rsp),%rsi
3067.cfi_def_cfa %rsi,8
3068 movq -48(%rsi),%r15
3069.cfi_restore %r15
3070 movq -40(%rsi),%r14
3071.cfi_restore %r14
3072 movq -32(%rsi),%r13
3073.cfi_restore %r13
3074 movq -24(%rsi),%r12
3075.cfi_restore %r12
3076 movq -16(%rsi),%rbx
3077.cfi_restore %rbx
3078 movq -8(%rsi),%rbp
3079.cfi_restore %rbp
3080 leaq (%rsi),%rsp
3081.cfi_def_cfa_register %rsp
3082.Lpoint_addq_epilogue:
3083 .byte 0xf3,0xc3
3084.cfi_endproc
3085.size ecp_nistz256_point_add,.-ecp_nistz256_point_add
3086.globl ecp_nistz256_point_add_affine
3087.hidden ecp_nistz256_point_add_affine
3088.type ecp_nistz256_point_add_affine,@function
3089.align 32
3090ecp_nistz256_point_add_affine:
3091.cfi_startproc
3092 leaq OPENSSL_ia32cap_P(%rip),%rcx
3093 movq 8(%rcx),%rcx
3094 andl $0x80100,%ecx
3095 cmpl $0x80100,%ecx
3096 je .Lpoint_add_affinex
3097 pushq %rbp
3098.cfi_adjust_cfa_offset 8
3099.cfi_offset %rbp,-16
3100 pushq %rbx
3101.cfi_adjust_cfa_offset 8
3102.cfi_offset %rbx,-24
3103 pushq %r12
3104.cfi_adjust_cfa_offset 8
3105.cfi_offset %r12,-32
3106 pushq %r13
3107.cfi_adjust_cfa_offset 8
3108.cfi_offset %r13,-40
3109 pushq %r14
3110.cfi_adjust_cfa_offset 8
3111.cfi_offset %r14,-48
3112 pushq %r15
3113.cfi_adjust_cfa_offset 8
3114.cfi_offset %r15,-56
3115 subq $480+8,%rsp
3116.cfi_adjust_cfa_offset 32*15+8
3117.Ladd_affineq_body:
3118
3119 movdqu 0(%rsi),%xmm0
3120 movq %rdx,%rbx
3121 movdqu 16(%rsi),%xmm1
3122 movdqu 32(%rsi),%xmm2
3123 movdqu 48(%rsi),%xmm3
3124 movdqu 64(%rsi),%xmm4
3125 movdqu 80(%rsi),%xmm5
3126 movq 64+0(%rsi),%rax
3127 movq 64+8(%rsi),%r14
3128 movq 64+16(%rsi),%r15
3129 movq 64+24(%rsi),%r8
3130 movdqa %xmm0,320(%rsp)
3131 movdqa %xmm1,320+16(%rsp)
3132 movdqa %xmm2,352(%rsp)
3133 movdqa %xmm3,352+16(%rsp)
3134 movdqa %xmm4,384(%rsp)
3135 movdqa %xmm5,384+16(%rsp)
3136 por %xmm4,%xmm5
3137
3138 movdqu 0(%rbx),%xmm0
3139 pshufd $0xb1,%xmm5,%xmm3
3140 movdqu 16(%rbx),%xmm1
3141 movdqu 32(%rbx),%xmm2
3142 por %xmm3,%xmm5
3143 movdqu 48(%rbx),%xmm3
3144 movdqa %xmm0,416(%rsp)
3145 pshufd $0x1e,%xmm5,%xmm4
3146 movdqa %xmm1,416+16(%rsp)
3147 por %xmm0,%xmm1
3148.byte 102,72,15,110,199
3149 movdqa %xmm2,448(%rsp)
3150 movdqa %xmm3,448+16(%rsp)
3151 por %xmm2,%xmm3
3152 por %xmm4,%xmm5
3153 pxor %xmm4,%xmm4
3154 por %xmm1,%xmm3
3155
3156 leaq 64-0(%rsi),%rsi
3157 leaq 32(%rsp),%rdi
3158 call __ecp_nistz256_sqr_montq
3159
3160 pcmpeqd %xmm4,%xmm5
3161 pshufd $0xb1,%xmm3,%xmm4
3162 movq 0(%rbx),%rax
3163
3164 movq %r12,%r9
3165 por %xmm3,%xmm4
3166 pshufd $0,%xmm5,%xmm5
3167 pshufd $0x1e,%xmm4,%xmm3
3168 movq %r13,%r10
3169 por %xmm3,%xmm4
3170 pxor %xmm3,%xmm3
3171 movq %r14,%r11
3172 pcmpeqd %xmm3,%xmm4
3173 pshufd $0,%xmm4,%xmm4
3174
3175 leaq 32-0(%rsp),%rsi
3176 movq %r15,%r12
3177 leaq 0(%rsp),%rdi
3178 call __ecp_nistz256_mul_montq
3179
3180 leaq 320(%rsp),%rbx
3181 leaq 64(%rsp),%rdi
3182 call __ecp_nistz256_sub_fromq
3183
3184 movq 384(%rsp),%rax
3185 leaq 384(%rsp),%rbx
3186 movq 0+32(%rsp),%r9
3187 movq 8+32(%rsp),%r10
3188 leaq 0+32(%rsp),%rsi
3189 movq 16+32(%rsp),%r11
3190 movq 24+32(%rsp),%r12
3191 leaq 32(%rsp),%rdi
3192 call __ecp_nistz256_mul_montq
3193
3194 movq 384(%rsp),%rax
3195 leaq 384(%rsp),%rbx
3196 movq 0+64(%rsp),%r9
3197 movq 8+64(%rsp),%r10
3198 leaq 0+64(%rsp),%rsi
3199 movq 16+64(%rsp),%r11
3200 movq 24+64(%rsp),%r12
3201 leaq 288(%rsp),%rdi
3202 call __ecp_nistz256_mul_montq
3203
3204 movq 448(%rsp),%rax
3205 leaq 448(%rsp),%rbx
3206 movq 0+32(%rsp),%r9
3207 movq 8+32(%rsp),%r10
3208 leaq 0+32(%rsp),%rsi
3209 movq 16+32(%rsp),%r11
3210 movq 24+32(%rsp),%r12
3211 leaq 32(%rsp),%rdi
3212 call __ecp_nistz256_mul_montq
3213
3214 leaq 352(%rsp),%rbx
3215 leaq 96(%rsp),%rdi
3216 call __ecp_nistz256_sub_fromq
3217
3218 movq 0+64(%rsp),%rax
3219 movq 8+64(%rsp),%r14
3220 leaq 0+64(%rsp),%rsi
3221 movq 16+64(%rsp),%r15
3222 movq 24+64(%rsp),%r8
3223 leaq 128(%rsp),%rdi
3224 call __ecp_nistz256_sqr_montq
3225
3226 movq 0+96(%rsp),%rax
3227 movq 8+96(%rsp),%r14
3228 leaq 0+96(%rsp),%rsi
3229 movq 16+96(%rsp),%r15
3230 movq 24+96(%rsp),%r8
3231 leaq 192(%rsp),%rdi
3232 call __ecp_nistz256_sqr_montq
3233
3234 movq 128(%rsp),%rax
3235 leaq 128(%rsp),%rbx
3236 movq 0+64(%rsp),%r9
3237 movq 8+64(%rsp),%r10
3238 leaq 0+64(%rsp),%rsi
3239 movq 16+64(%rsp),%r11
3240 movq 24+64(%rsp),%r12
3241 leaq 160(%rsp),%rdi
3242 call __ecp_nistz256_mul_montq
3243
3244 movq 320(%rsp),%rax
3245 leaq 320(%rsp),%rbx
3246 movq 0+128(%rsp),%r9
3247 movq 8+128(%rsp),%r10
3248 leaq 0+128(%rsp),%rsi
3249 movq 16+128(%rsp),%r11
3250 movq 24+128(%rsp),%r12
3251 leaq 0(%rsp),%rdi
3252 call __ecp_nistz256_mul_montq
3253
3254
3255
3256
3257 xorq %r11,%r11
3258 addq %r12,%r12
3259 leaq 192(%rsp),%rsi
3260 adcq %r13,%r13
3261 movq %r12,%rax
3262 adcq %r8,%r8
3263 adcq %r9,%r9
3264 movq %r13,%rbp
3265 adcq $0,%r11
3266
3267 subq $-1,%r12
3268 movq %r8,%rcx
3269 sbbq %r14,%r13
3270 sbbq $0,%r8
3271 movq %r9,%r10
3272 sbbq %r15,%r9
3273 sbbq $0,%r11
3274
3275 cmovcq %rax,%r12
3276 movq 0(%rsi),%rax
3277 cmovcq %rbp,%r13
3278 movq 8(%rsi),%rbp
3279 cmovcq %rcx,%r8
3280 movq 16(%rsi),%rcx
3281 cmovcq %r10,%r9
3282 movq 24(%rsi),%r10
3283
3284 call __ecp_nistz256_subq
3285
3286 leaq 160(%rsp),%rbx
3287 leaq 224(%rsp),%rdi
3288 call __ecp_nistz256_sub_fromq
3289
3290 movq 0+0(%rsp),%rax
3291 movq 0+8(%rsp),%rbp
3292 movq 0+16(%rsp),%rcx
3293 movq 0+24(%rsp),%r10
3294 leaq 64(%rsp),%rdi
3295
3296 call __ecp_nistz256_subq
3297
3298 movq %r12,0(%rdi)
3299 movq %r13,8(%rdi)
3300 movq %r8,16(%rdi)
3301 movq %r9,24(%rdi)
3302 movq 352(%rsp),%rax
3303 leaq 352(%rsp),%rbx
3304 movq 0+160(%rsp),%r9
3305 movq 8+160(%rsp),%r10
3306 leaq 0+160(%rsp),%rsi
3307 movq 16+160(%rsp),%r11
3308 movq 24+160(%rsp),%r12
3309 leaq 32(%rsp),%rdi
3310 call __ecp_nistz256_mul_montq
3311
3312 movq 96(%rsp),%rax
3313 leaq 96(%rsp),%rbx
3314 movq 0+64(%rsp),%r9
3315 movq 8+64(%rsp),%r10
3316 leaq 0+64(%rsp),%rsi
3317 movq 16+64(%rsp),%r11
3318 movq 24+64(%rsp),%r12
3319 leaq 64(%rsp),%rdi
3320 call __ecp_nistz256_mul_montq
3321
3322 leaq 32(%rsp),%rbx
3323 leaq 256(%rsp),%rdi
3324 call __ecp_nistz256_sub_fromq
3325
3326.byte 102,72,15,126,199
3327
3328 movdqa %xmm5,%xmm0
3329 movdqa %xmm5,%xmm1
3330 pandn 288(%rsp),%xmm0
3331 movdqa %xmm5,%xmm2
3332 pandn 288+16(%rsp),%xmm1
3333 movdqa %xmm5,%xmm3
3334 pand .LONE_mont(%rip),%xmm2
3335 pand .LONE_mont+16(%rip),%xmm3
3336 por %xmm0,%xmm2
3337 por %xmm1,%xmm3
3338
3339 movdqa %xmm4,%xmm0
3340 movdqa %xmm4,%xmm1
3341 pandn %xmm2,%xmm0
3342 movdqa %xmm4,%xmm2
3343 pandn %xmm3,%xmm1
3344 movdqa %xmm4,%xmm3
3345 pand 384(%rsp),%xmm2
3346 pand 384+16(%rsp),%xmm3
3347 por %xmm0,%xmm2
3348 por %xmm1,%xmm3
3349 movdqu %xmm2,64(%rdi)
3350 movdqu %xmm3,80(%rdi)
3351
3352 movdqa %xmm5,%xmm0
3353 movdqa %xmm5,%xmm1
3354 pandn 224(%rsp),%xmm0
3355 movdqa %xmm5,%xmm2
3356 pandn 224+16(%rsp),%xmm1
3357 movdqa %xmm5,%xmm3
3358 pand 416(%rsp),%xmm2
3359 pand 416+16(%rsp),%xmm3
3360 por %xmm0,%xmm2
3361 por %xmm1,%xmm3
3362
3363 movdqa %xmm4,%xmm0
3364 movdqa %xmm4,%xmm1
3365 pandn %xmm2,%xmm0
3366 movdqa %xmm4,%xmm2
3367 pandn %xmm3,%xmm1
3368 movdqa %xmm4,%xmm3
3369 pand 320(%rsp),%xmm2
3370 pand 320+16(%rsp),%xmm3
3371 por %xmm0,%xmm2
3372 por %xmm1,%xmm3
3373 movdqu %xmm2,0(%rdi)
3374 movdqu %xmm3,16(%rdi)
3375
3376 movdqa %xmm5,%xmm0
3377 movdqa %xmm5,%xmm1
3378 pandn 256(%rsp),%xmm0
3379 movdqa %xmm5,%xmm2
3380 pandn 256+16(%rsp),%xmm1
3381 movdqa %xmm5,%xmm3
3382 pand 448(%rsp),%xmm2
3383 pand 448+16(%rsp),%xmm3
3384 por %xmm0,%xmm2
3385 por %xmm1,%xmm3
3386
3387 movdqa %xmm4,%xmm0
3388 movdqa %xmm4,%xmm1
3389 pandn %xmm2,%xmm0
3390 movdqa %xmm4,%xmm2
3391 pandn %xmm3,%xmm1
3392 movdqa %xmm4,%xmm3
3393 pand 352(%rsp),%xmm2
3394 pand 352+16(%rsp),%xmm3
3395 por %xmm0,%xmm2
3396 por %xmm1,%xmm3
3397 movdqu %xmm2,32(%rdi)
3398 movdqu %xmm3,48(%rdi)
3399
3400 leaq 480+56(%rsp),%rsi
3401.cfi_def_cfa %rsi,8
3402 movq -48(%rsi),%r15
3403.cfi_restore %r15
3404 movq -40(%rsi),%r14
3405.cfi_restore %r14
3406 movq -32(%rsi),%r13
3407.cfi_restore %r13
3408 movq -24(%rsi),%r12
3409.cfi_restore %r12
3410 movq -16(%rsi),%rbx
3411.cfi_restore %rbx
3412 movq -8(%rsi),%rbp
3413.cfi_restore %rbp
3414 leaq (%rsi),%rsp
3415.cfi_def_cfa_register %rsp
3416.Ladd_affineq_epilogue:
3417 .byte 0xf3,0xc3
3418.cfi_endproc
3419.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
3420.type __ecp_nistz256_add_tox,@function
3421.align 32
3422__ecp_nistz256_add_tox:
3423.cfi_startproc
3424 xorq %r11,%r11
3425 adcq 0(%rbx),%r12
3426 adcq 8(%rbx),%r13
3427 movq %r12,%rax
3428 adcq 16(%rbx),%r8
3429 adcq 24(%rbx),%r9
3430 movq %r13,%rbp
3431 adcq $0,%r11
3432
3433 xorq %r10,%r10
3434 sbbq $-1,%r12
3435 movq %r8,%rcx
3436 sbbq %r14,%r13
3437 sbbq $0,%r8
3438 movq %r9,%r10
3439 sbbq %r15,%r9
3440 sbbq $0,%r11
3441
3442 cmovcq %rax,%r12
3443 cmovcq %rbp,%r13
3444 movq %r12,0(%rdi)
3445 cmovcq %rcx,%r8
3446 movq %r13,8(%rdi)
3447 cmovcq %r10,%r9
3448 movq %r8,16(%rdi)
3449 movq %r9,24(%rdi)
3450
3451 .byte 0xf3,0xc3
3452.cfi_endproc
3453.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
3454
3455.type __ecp_nistz256_sub_fromx,@function
3456.align 32
3457__ecp_nistz256_sub_fromx:
3458.cfi_startproc
3459 xorq %r11,%r11
3460 sbbq 0(%rbx),%r12
3461 sbbq 8(%rbx),%r13
3462 movq %r12,%rax
3463 sbbq 16(%rbx),%r8
3464 sbbq 24(%rbx),%r9
3465 movq %r13,%rbp
3466 sbbq $0,%r11
3467
3468 xorq %r10,%r10
3469 adcq $-1,%r12
3470 movq %r8,%rcx
3471 adcq %r14,%r13
3472 adcq $0,%r8
3473 movq %r9,%r10
3474 adcq %r15,%r9
3475
3476 btq $0,%r11
3477 cmovncq %rax,%r12
3478 cmovncq %rbp,%r13
3479 movq %r12,0(%rdi)
3480 cmovncq %rcx,%r8
3481 movq %r13,8(%rdi)
3482 cmovncq %r10,%r9
3483 movq %r8,16(%rdi)
3484 movq %r9,24(%rdi)
3485
3486 .byte 0xf3,0xc3
3487.cfi_endproc
3488.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
3489
3490.type __ecp_nistz256_subx,@function
3491.align 32
3492__ecp_nistz256_subx:
3493.cfi_startproc
3494 xorq %r11,%r11
3495 sbbq %r12,%rax
3496 sbbq %r13,%rbp
3497 movq %rax,%r12
3498 sbbq %r8,%rcx
3499 sbbq %r9,%r10
3500 movq %rbp,%r13
3501 sbbq $0,%r11
3502
3503 xorq %r9,%r9
3504 adcq $-1,%rax
3505 movq %rcx,%r8
3506 adcq %r14,%rbp
3507 adcq $0,%rcx
3508 movq %r10,%r9
3509 adcq %r15,%r10
3510
3511 btq $0,%r11
3512 cmovcq %rax,%r12
3513 cmovcq %rbp,%r13
3514 cmovcq %rcx,%r8
3515 cmovcq %r10,%r9
3516
3517 .byte 0xf3,0xc3
3518.cfi_endproc
3519.size __ecp_nistz256_subx,.-__ecp_nistz256_subx
3520
3521.type __ecp_nistz256_mul_by_2x,@function
3522.align 32
3523__ecp_nistz256_mul_by_2x:
3524.cfi_startproc
3525 xorq %r11,%r11
3526 adcq %r12,%r12
3527 adcq %r13,%r13
3528 movq %r12,%rax
3529 adcq %r8,%r8
3530 adcq %r9,%r9
3531 movq %r13,%rbp
3532 adcq $0,%r11
3533
3534 xorq %r10,%r10
3535 sbbq $-1,%r12
3536 movq %r8,%rcx
3537 sbbq %r14,%r13
3538 sbbq $0,%r8
3539 movq %r9,%r10
3540 sbbq %r15,%r9
3541 sbbq $0,%r11
3542
3543 cmovcq %rax,%r12
3544 cmovcq %rbp,%r13
3545 movq %r12,0(%rdi)
3546 cmovcq %rcx,%r8
3547 movq %r13,8(%rdi)
3548 cmovcq %r10,%r9
3549 movq %r8,16(%rdi)
3550 movq %r9,24(%rdi)
3551
3552 .byte 0xf3,0xc3
3553.cfi_endproc
3554.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
3555.type ecp_nistz256_point_doublex,@function
3556.align 32
3557ecp_nistz256_point_doublex:
3558.cfi_startproc
3559.Lpoint_doublex:
3560 pushq %rbp
3561.cfi_adjust_cfa_offset 8
3562.cfi_offset %rbp,-16
3563 pushq %rbx
3564.cfi_adjust_cfa_offset 8
3565.cfi_offset %rbx,-24
3566 pushq %r12
3567.cfi_adjust_cfa_offset 8
3568.cfi_offset %r12,-32
3569 pushq %r13
3570.cfi_adjust_cfa_offset 8
3571.cfi_offset %r13,-40
3572 pushq %r14
3573.cfi_adjust_cfa_offset 8
3574.cfi_offset %r14,-48
3575 pushq %r15
3576.cfi_adjust_cfa_offset 8
3577.cfi_offset %r15,-56
3578 subq $160+8,%rsp
3579.cfi_adjust_cfa_offset 32*5+8
3580.Lpoint_doublex_body:
3581
3582.Lpoint_double_shortcutx:
3583 movdqu 0(%rsi),%xmm0
3584 movq %rsi,%rbx
3585 movdqu 16(%rsi),%xmm1
3586 movq 32+0(%rsi),%r12
3587 movq 32+8(%rsi),%r13
3588 movq 32+16(%rsi),%r8
3589 movq 32+24(%rsi),%r9
3590 movq .Lpoly+8(%rip),%r14
3591 movq .Lpoly+24(%rip),%r15
3592 movdqa %xmm0,96(%rsp)
3593 movdqa %xmm1,96+16(%rsp)
3594 leaq 32(%rdi),%r10
3595 leaq 64(%rdi),%r11
3596.byte 102,72,15,110,199
3597.byte 102,73,15,110,202
3598.byte 102,73,15,110,211
3599
3600 leaq 0(%rsp),%rdi
3601 call __ecp_nistz256_mul_by_2x
3602
3603 movq 64+0(%rsi),%rdx
3604 movq 64+8(%rsi),%r14
3605 movq 64+16(%rsi),%r15
3606 movq 64+24(%rsi),%r8
3607 leaq 64-128(%rsi),%rsi
3608 leaq 64(%rsp),%rdi
3609 call __ecp_nistz256_sqr_montx
3610
3611 movq 0+0(%rsp),%rdx
3612 movq 8+0(%rsp),%r14
3613 leaq -128+0(%rsp),%rsi
3614 movq 16+0(%rsp),%r15
3615 movq 24+0(%rsp),%r8
3616 leaq 0(%rsp),%rdi
3617 call __ecp_nistz256_sqr_montx
3618
3619 movq 32(%rbx),%rdx
3620 movq 64+0(%rbx),%r9
3621 movq 64+8(%rbx),%r10
3622 movq 64+16(%rbx),%r11
3623 movq 64+24(%rbx),%r12
3624 leaq 64-128(%rbx),%rsi
3625 leaq 32(%rbx),%rbx
3626.byte 102,72,15,126,215
3627 call __ecp_nistz256_mul_montx
3628 call __ecp_nistz256_mul_by_2x
3629
3630 movq 96+0(%rsp),%r12
3631 movq 96+8(%rsp),%r13
3632 leaq 64(%rsp),%rbx
3633 movq 96+16(%rsp),%r8
3634 movq 96+24(%rsp),%r9
3635 leaq 32(%rsp),%rdi
3636 call __ecp_nistz256_add_tox
3637
3638 movq 96+0(%rsp),%r12
3639 movq 96+8(%rsp),%r13
3640 leaq 64(%rsp),%rbx
3641 movq 96+16(%rsp),%r8
3642 movq 96+24(%rsp),%r9
3643 leaq 64(%rsp),%rdi
3644 call __ecp_nistz256_sub_fromx
3645
3646 movq 0+0(%rsp),%rdx
3647 movq 8+0(%rsp),%r14
3648 leaq -128+0(%rsp),%rsi
3649 movq 16+0(%rsp),%r15
3650 movq 24+0(%rsp),%r8
3651.byte 102,72,15,126,207
3652 call __ecp_nistz256_sqr_montx
3653 xorq %r9,%r9
3654 movq %r12,%rax
3655 addq $-1,%r12
3656 movq %r13,%r10
3657 adcq %rsi,%r13
3658 movq %r14,%rcx
3659 adcq $0,%r14
3660 movq %r15,%r8
3661 adcq %rbp,%r15
3662 adcq $0,%r9
3663 xorq %rsi,%rsi
3664 testq $1,%rax
3665
3666 cmovzq %rax,%r12
3667 cmovzq %r10,%r13
3668 cmovzq %rcx,%r14
3669 cmovzq %r8,%r15
3670 cmovzq %rsi,%r9
3671
3672 movq %r13,%rax
3673 shrq $1,%r12
3674 shlq $63,%rax
3675 movq %r14,%r10
3676 shrq $1,%r13
3677 orq %rax,%r12
3678 shlq $63,%r10
3679 movq %r15,%rcx
3680 shrq $1,%r14
3681 orq %r10,%r13
3682 shlq $63,%rcx
3683 movq %r12,0(%rdi)
3684 shrq $1,%r15
3685 movq %r13,8(%rdi)
3686 shlq $63,%r9
3687 orq %rcx,%r14
3688 orq %r9,%r15
3689 movq %r14,16(%rdi)
3690 movq %r15,24(%rdi)
3691 movq 64(%rsp),%rdx
3692 leaq 64(%rsp),%rbx
3693 movq 0+32(%rsp),%r9
3694 movq 8+32(%rsp),%r10
3695 leaq -128+32(%rsp),%rsi
3696 movq 16+32(%rsp),%r11
3697 movq 24+32(%rsp),%r12
3698 leaq 32(%rsp),%rdi
3699 call __ecp_nistz256_mul_montx
3700
3701 leaq 128(%rsp),%rdi
3702 call __ecp_nistz256_mul_by_2x
3703
3704 leaq 32(%rsp),%rbx
3705 leaq 32(%rsp),%rdi
3706 call __ecp_nistz256_add_tox
3707
3708 movq 96(%rsp),%rdx
3709 leaq 96(%rsp),%rbx
3710 movq 0+0(%rsp),%r9
3711 movq 8+0(%rsp),%r10
3712 leaq -128+0(%rsp),%rsi
3713 movq 16+0(%rsp),%r11
3714 movq 24+0(%rsp),%r12
3715 leaq 0(%rsp),%rdi
3716 call __ecp_nistz256_mul_montx
3717
3718 leaq 128(%rsp),%rdi
3719 call __ecp_nistz256_mul_by_2x
3720
3721 movq 0+32(%rsp),%rdx
3722 movq 8+32(%rsp),%r14
3723 leaq -128+32(%rsp),%rsi
3724 movq 16+32(%rsp),%r15
3725 movq 24+32(%rsp),%r8
3726.byte 102,72,15,126,199
3727 call __ecp_nistz256_sqr_montx
3728
3729 leaq 128(%rsp),%rbx
3730 movq %r14,%r8
3731 movq %r15,%r9
3732 movq %rsi,%r14
3733 movq %rbp,%r15
3734 call __ecp_nistz256_sub_fromx
3735
3736 movq 0+0(%rsp),%rax
3737 movq 0+8(%rsp),%rbp
3738 movq 0+16(%rsp),%rcx
3739 movq 0+24(%rsp),%r10
3740 leaq 0(%rsp),%rdi
3741 call __ecp_nistz256_subx
3742
3743 movq 32(%rsp),%rdx
3744 leaq 32(%rsp),%rbx
3745 movq %r12,%r14
3746 xorl %ecx,%ecx
3747 movq %r12,0+0(%rsp)
3748 movq %r13,%r10
3749 movq %r13,0+8(%rsp)
3750 cmovzq %r8,%r11
3751 movq %r8,0+16(%rsp)
3752 leaq 0-128(%rsp),%rsi
3753 cmovzq %r9,%r12
3754 movq %r9,0+24(%rsp)
3755 movq %r14,%r9
3756 leaq 0(%rsp),%rdi
3757 call __ecp_nistz256_mul_montx
3758
3759.byte 102,72,15,126,203
3760.byte 102,72,15,126,207
3761 call __ecp_nistz256_sub_fromx
3762
3763 leaq 160+56(%rsp),%rsi
3764.cfi_def_cfa %rsi,8
3765 movq -48(%rsi),%r15
3766.cfi_restore %r15
3767 movq -40(%rsi),%r14
3768.cfi_restore %r14
3769 movq -32(%rsi),%r13
3770.cfi_restore %r13
3771 movq -24(%rsi),%r12
3772.cfi_restore %r12
3773 movq -16(%rsi),%rbx
3774.cfi_restore %rbx
3775 movq -8(%rsi),%rbp
3776.cfi_restore %rbp
3777 leaq (%rsi),%rsp
3778.cfi_def_cfa_register %rsp
3779.Lpoint_doublex_epilogue:
3780 .byte 0xf3,0xc3
3781.cfi_endproc
3782.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
3783.type ecp_nistz256_point_addx,@function
3784.align 32
3785ecp_nistz256_point_addx:
3786.cfi_startproc
3787.Lpoint_addx:
3788 pushq %rbp
3789.cfi_adjust_cfa_offset 8
3790.cfi_offset %rbp,-16
3791 pushq %rbx
3792.cfi_adjust_cfa_offset 8
3793.cfi_offset %rbx,-24
3794 pushq %r12
3795.cfi_adjust_cfa_offset 8
3796.cfi_offset %r12,-32
3797 pushq %r13
3798.cfi_adjust_cfa_offset 8
3799.cfi_offset %r13,-40
3800 pushq %r14
3801.cfi_adjust_cfa_offset 8
3802.cfi_offset %r14,-48
3803 pushq %r15
3804.cfi_adjust_cfa_offset 8
3805.cfi_offset %r15,-56
3806 subq $576+8,%rsp
3807.cfi_adjust_cfa_offset 32*18+8
3808.Lpoint_addx_body:
3809
3810 movdqu 0(%rsi),%xmm0
3811 movdqu 16(%rsi),%xmm1
3812 movdqu 32(%rsi),%xmm2
3813 movdqu 48(%rsi),%xmm3
3814 movdqu 64(%rsi),%xmm4
3815 movdqu 80(%rsi),%xmm5
3816 movq %rsi,%rbx
3817 movq %rdx,%rsi
3818 movdqa %xmm0,384(%rsp)
3819 movdqa %xmm1,384+16(%rsp)
3820 movdqa %xmm2,416(%rsp)
3821 movdqa %xmm3,416+16(%rsp)
3822 movdqa %xmm4,448(%rsp)
3823 movdqa %xmm5,448+16(%rsp)
3824 por %xmm4,%xmm5
3825
3826 movdqu 0(%rsi),%xmm0
3827 pshufd $0xb1,%xmm5,%xmm3
3828 movdqu 16(%rsi),%xmm1
3829 movdqu 32(%rsi),%xmm2
3830 por %xmm3,%xmm5
3831 movdqu 48(%rsi),%xmm3
3832 movq 64+0(%rsi),%rdx
3833 movq 64+8(%rsi),%r14
3834 movq 64+16(%rsi),%r15
3835 movq 64+24(%rsi),%r8
3836 movdqa %xmm0,480(%rsp)
3837 pshufd $0x1e,%xmm5,%xmm4
3838 movdqa %xmm1,480+16(%rsp)
3839 movdqu 64(%rsi),%xmm0
3840 movdqu 80(%rsi),%xmm1
3841 movdqa %xmm2,512(%rsp)
3842 movdqa %xmm3,512+16(%rsp)
3843 por %xmm4,%xmm5
3844 pxor %xmm4,%xmm4
3845 por %xmm0,%xmm1
3846.byte 102,72,15,110,199
3847
3848 leaq 64-128(%rsi),%rsi
3849 movq %rdx,544+0(%rsp)
3850 movq %r14,544+8(%rsp)
3851 movq %r15,544+16(%rsp)
3852 movq %r8,544+24(%rsp)
3853 leaq 96(%rsp),%rdi
3854 call __ecp_nistz256_sqr_montx
3855
3856 pcmpeqd %xmm4,%xmm5
3857 pshufd $0xb1,%xmm1,%xmm4
3858 por %xmm1,%xmm4
3859 pshufd $0,%xmm5,%xmm5
3860 pshufd $0x1e,%xmm4,%xmm3
3861 por %xmm3,%xmm4
3862 pxor %xmm3,%xmm3
3863 pcmpeqd %xmm3,%xmm4
3864 pshufd $0,%xmm4,%xmm4
3865 movq 64+0(%rbx),%rdx
3866 movq 64+8(%rbx),%r14
3867 movq 64+16(%rbx),%r15
3868 movq 64+24(%rbx),%r8
3869.byte 102,72,15,110,203
3870
3871 leaq 64-128(%rbx),%rsi
3872 leaq 32(%rsp),%rdi
3873 call __ecp_nistz256_sqr_montx
3874
3875 movq 544(%rsp),%rdx
3876 leaq 544(%rsp),%rbx
3877 movq 0+96(%rsp),%r9
3878 movq 8+96(%rsp),%r10
3879 leaq -128+96(%rsp),%rsi
3880 movq 16+96(%rsp),%r11
3881 movq 24+96(%rsp),%r12
3882 leaq 224(%rsp),%rdi
3883 call __ecp_nistz256_mul_montx
3884
3885 movq 448(%rsp),%rdx
3886 leaq 448(%rsp),%rbx
3887 movq 0+32(%rsp),%r9
3888 movq 8+32(%rsp),%r10
3889 leaq -128+32(%rsp),%rsi
3890 movq 16+32(%rsp),%r11
3891 movq 24+32(%rsp),%r12
3892 leaq 256(%rsp),%rdi
3893 call __ecp_nistz256_mul_montx
3894
3895 movq 416(%rsp),%rdx
3896 leaq 416(%rsp),%rbx
3897 movq 0+224(%rsp),%r9
3898 movq 8+224(%rsp),%r10
3899 leaq -128+224(%rsp),%rsi
3900 movq 16+224(%rsp),%r11
3901 movq 24+224(%rsp),%r12
3902 leaq 224(%rsp),%rdi
3903 call __ecp_nistz256_mul_montx
3904
3905 movq 512(%rsp),%rdx
3906 leaq 512(%rsp),%rbx
3907 movq 0+256(%rsp),%r9
3908 movq 8+256(%rsp),%r10
3909 leaq -128+256(%rsp),%rsi
3910 movq 16+256(%rsp),%r11
3911 movq 24+256(%rsp),%r12
3912 leaq 256(%rsp),%rdi
3913 call __ecp_nistz256_mul_montx
3914
3915 leaq 224(%rsp),%rbx
3916 leaq 64(%rsp),%rdi
3917 call __ecp_nistz256_sub_fromx
3918
3919 orq %r13,%r12
3920 movdqa %xmm4,%xmm2
3921 orq %r8,%r12
3922 orq %r9,%r12
3923 por %xmm5,%xmm2
3924.byte 102,73,15,110,220
3925
3926 movq 384(%rsp),%rdx
3927 leaq 384(%rsp),%rbx
3928 movq 0+96(%rsp),%r9
3929 movq 8+96(%rsp),%r10
3930 leaq -128+96(%rsp),%rsi
3931 movq 16+96(%rsp),%r11
3932 movq 24+96(%rsp),%r12
3933 leaq 160(%rsp),%rdi
3934 call __ecp_nistz256_mul_montx
3935
3936 movq 480(%rsp),%rdx
3937 leaq 480(%rsp),%rbx
3938 movq 0+32(%rsp),%r9
3939 movq 8+32(%rsp),%r10
3940 leaq -128+32(%rsp),%rsi
3941 movq 16+32(%rsp),%r11
3942 movq 24+32(%rsp),%r12
3943 leaq 192(%rsp),%rdi
3944 call __ecp_nistz256_mul_montx
3945
3946 leaq 160(%rsp),%rbx
3947 leaq 0(%rsp),%rdi
3948 call __ecp_nistz256_sub_fromx
3949
3950 orq %r13,%r12
3951 orq %r8,%r12
3952 orq %r9,%r12
3953
3954.byte 102,73,15,126,208
3955.byte 102,73,15,126,217
3956 orq %r8,%r12
3957.byte 0x3e
3958 jnz .Ladd_proceedx
3959
3960
3961
3962 testq %r9,%r9
3963 jz .Ladd_doublex
3964
3965
3966
3967
3968
3969
3970.byte 102,72,15,126,199
3971 pxor %xmm0,%xmm0
3972 movdqu %xmm0,0(%rdi)
3973 movdqu %xmm0,16(%rdi)
3974 movdqu %xmm0,32(%rdi)
3975 movdqu %xmm0,48(%rdi)
3976 movdqu %xmm0,64(%rdi)
3977 movdqu %xmm0,80(%rdi)
3978 jmp .Ladd_donex
3979
3980.align 32
3981.Ladd_doublex:
3982.byte 102,72,15,126,206
3983.byte 102,72,15,126,199
3984 addq $416,%rsp
3985.cfi_adjust_cfa_offset -416
3986 jmp .Lpoint_double_shortcutx
3987.cfi_adjust_cfa_offset 416
3988
3989.align 32
3990.Ladd_proceedx:
3991 movq 0+64(%rsp),%rdx
3992 movq 8+64(%rsp),%r14
3993 leaq -128+64(%rsp),%rsi
3994 movq 16+64(%rsp),%r15
3995 movq 24+64(%rsp),%r8
3996 leaq 96(%rsp),%rdi
3997 call __ecp_nistz256_sqr_montx
3998
3999 movq 448(%rsp),%rdx
4000 leaq 448(%rsp),%rbx
4001 movq 0+0(%rsp),%r9
4002 movq 8+0(%rsp),%r10
4003 leaq -128+0(%rsp),%rsi
4004 movq 16+0(%rsp),%r11
4005 movq 24+0(%rsp),%r12
4006 leaq 352(%rsp),%rdi
4007 call __ecp_nistz256_mul_montx
4008
4009 movq 0+0(%rsp),%rdx
4010 movq 8+0(%rsp),%r14
4011 leaq -128+0(%rsp),%rsi
4012 movq 16+0(%rsp),%r15
4013 movq 24+0(%rsp),%r8
4014 leaq 32(%rsp),%rdi
4015 call __ecp_nistz256_sqr_montx
4016
4017 movq 544(%rsp),%rdx
4018 leaq 544(%rsp),%rbx
4019 movq 0+352(%rsp),%r9
4020 movq 8+352(%rsp),%r10
4021 leaq -128+352(%rsp),%rsi
4022 movq 16+352(%rsp),%r11
4023 movq 24+352(%rsp),%r12
4024 leaq 352(%rsp),%rdi
4025 call __ecp_nistz256_mul_montx
4026
4027 movq 0(%rsp),%rdx
4028 leaq 0(%rsp),%rbx
4029 movq 0+32(%rsp),%r9
4030 movq 8+32(%rsp),%r10
4031 leaq -128+32(%rsp),%rsi
4032 movq 16+32(%rsp),%r11
4033 movq 24+32(%rsp),%r12
4034 leaq 128(%rsp),%rdi
4035 call __ecp_nistz256_mul_montx
4036
4037 movq 160(%rsp),%rdx
4038 leaq 160(%rsp),%rbx
4039 movq 0+32(%rsp),%r9
4040 movq 8+32(%rsp),%r10
4041 leaq -128+32(%rsp),%rsi
4042 movq 16+32(%rsp),%r11
4043 movq 24+32(%rsp),%r12
4044 leaq 192(%rsp),%rdi
4045 call __ecp_nistz256_mul_montx
4046
4047
4048
4049
4050 xorq %r11,%r11
4051 addq %r12,%r12
4052 leaq 96(%rsp),%rsi
4053 adcq %r13,%r13
4054 movq %r12,%rax
4055 adcq %r8,%r8
4056 adcq %r9,%r9
4057 movq %r13,%rbp
4058 adcq $0,%r11
4059
4060 subq $-1,%r12
4061 movq %r8,%rcx
4062 sbbq %r14,%r13
4063 sbbq $0,%r8
4064 movq %r9,%r10
4065 sbbq %r15,%r9
4066 sbbq $0,%r11
4067
4068 cmovcq %rax,%r12
4069 movq 0(%rsi),%rax
4070 cmovcq %rbp,%r13
4071 movq 8(%rsi),%rbp
4072 cmovcq %rcx,%r8
4073 movq 16(%rsi),%rcx
4074 cmovcq %r10,%r9
4075 movq 24(%rsi),%r10
4076
4077 call __ecp_nistz256_subx
4078
4079 leaq 128(%rsp),%rbx
4080 leaq 288(%rsp),%rdi
4081 call __ecp_nistz256_sub_fromx
4082
4083 movq 192+0(%rsp),%rax
4084 movq 192+8(%rsp),%rbp
4085 movq 192+16(%rsp),%rcx
4086 movq 192+24(%rsp),%r10
4087 leaq 320(%rsp),%rdi
4088
4089 call __ecp_nistz256_subx
4090
4091 movq %r12,0(%rdi)
4092 movq %r13,8(%rdi)
4093 movq %r8,16(%rdi)
4094 movq %r9,24(%rdi)
4095 movq 128(%rsp),%rdx
4096 leaq 128(%rsp),%rbx
4097 movq 0+224(%rsp),%r9
4098 movq 8+224(%rsp),%r10
4099 leaq -128+224(%rsp),%rsi
4100 movq 16+224(%rsp),%r11
4101 movq 24+224(%rsp),%r12
4102 leaq 256(%rsp),%rdi
4103 call __ecp_nistz256_mul_montx
4104
4105 movq 320(%rsp),%rdx
4106 leaq 320(%rsp),%rbx
4107 movq 0+64(%rsp),%r9
4108 movq 8+64(%rsp),%r10
4109 leaq -128+64(%rsp),%rsi
4110 movq 16+64(%rsp),%r11
4111 movq 24+64(%rsp),%r12
4112 leaq 320(%rsp),%rdi
4113 call __ecp_nistz256_mul_montx
4114
4115 leaq 256(%rsp),%rbx
4116 leaq 320(%rsp),%rdi
4117 call __ecp_nistz256_sub_fromx
4118
4119.byte 102,72,15,126,199
4120
4121 movdqa %xmm5,%xmm0
4122 movdqa %xmm5,%xmm1
4123 pandn 352(%rsp),%xmm0
4124 movdqa %xmm5,%xmm2
4125 pandn 352+16(%rsp),%xmm1
4126 movdqa %xmm5,%xmm3
4127 pand 544(%rsp),%xmm2
4128 pand 544+16(%rsp),%xmm3
4129 por %xmm0,%xmm2
4130 por %xmm1,%xmm3
4131
4132 movdqa %xmm4,%xmm0
4133 movdqa %xmm4,%xmm1
4134 pandn %xmm2,%xmm0
4135 movdqa %xmm4,%xmm2
4136 pandn %xmm3,%xmm1
4137 movdqa %xmm4,%xmm3
4138 pand 448(%rsp),%xmm2
4139 pand 448+16(%rsp),%xmm3
4140 por %xmm0,%xmm2
4141 por %xmm1,%xmm3
4142 movdqu %xmm2,64(%rdi)
4143 movdqu %xmm3,80(%rdi)
4144
4145 movdqa %xmm5,%xmm0
4146 movdqa %xmm5,%xmm1
4147 pandn 288(%rsp),%xmm0
4148 movdqa %xmm5,%xmm2
4149 pandn 288+16(%rsp),%xmm1
4150 movdqa %xmm5,%xmm3
4151 pand 480(%rsp),%xmm2
4152 pand 480+16(%rsp),%xmm3
4153 por %xmm0,%xmm2
4154 por %xmm1,%xmm3
4155
4156 movdqa %xmm4,%xmm0
4157 movdqa %xmm4,%xmm1
4158 pandn %xmm2,%xmm0
4159 movdqa %xmm4,%xmm2
4160 pandn %xmm3,%xmm1
4161 movdqa %xmm4,%xmm3
4162 pand 384(%rsp),%xmm2
4163 pand 384+16(%rsp),%xmm3
4164 por %xmm0,%xmm2
4165 por %xmm1,%xmm3
4166 movdqu %xmm2,0(%rdi)
4167 movdqu %xmm3,16(%rdi)
4168
4169 movdqa %xmm5,%xmm0
4170 movdqa %xmm5,%xmm1
4171 pandn 320(%rsp),%xmm0
4172 movdqa %xmm5,%xmm2
4173 pandn 320+16(%rsp),%xmm1
4174 movdqa %xmm5,%xmm3
4175 pand 512(%rsp),%xmm2
4176 pand 512+16(%rsp),%xmm3
4177 por %xmm0,%xmm2
4178 por %xmm1,%xmm3
4179
4180 movdqa %xmm4,%xmm0
4181 movdqa %xmm4,%xmm1
4182 pandn %xmm2,%xmm0
4183 movdqa %xmm4,%xmm2
4184 pandn %xmm3,%xmm1
4185 movdqa %xmm4,%xmm3
4186 pand 416(%rsp),%xmm2
4187 pand 416+16(%rsp),%xmm3
4188 por %xmm0,%xmm2
4189 por %xmm1,%xmm3
4190 movdqu %xmm2,32(%rdi)
4191 movdqu %xmm3,48(%rdi)
4192
4193.Ladd_donex:
4194 leaq 576+56(%rsp),%rsi
4195.cfi_def_cfa %rsi,8
4196 movq -48(%rsi),%r15
4197.cfi_restore %r15
4198 movq -40(%rsi),%r14
4199.cfi_restore %r14
4200 movq -32(%rsi),%r13
4201.cfi_restore %r13
4202 movq -24(%rsi),%r12
4203.cfi_restore %r12
4204 movq -16(%rsi),%rbx
4205.cfi_restore %rbx
4206 movq -8(%rsi),%rbp
4207.cfi_restore %rbp
4208 leaq (%rsi),%rsp
4209.cfi_def_cfa_register %rsp
4210.Lpoint_addx_epilogue:
4211 .byte 0xf3,0xc3
4212.cfi_endproc
4213.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
4214.type ecp_nistz256_point_add_affinex,@function
4215.align 32
4216ecp_nistz256_point_add_affinex:
4217.cfi_startproc
4218.Lpoint_add_affinex:
4219 pushq %rbp
4220.cfi_adjust_cfa_offset 8
4221.cfi_offset %rbp,-16
4222 pushq %rbx
4223.cfi_adjust_cfa_offset 8
4224.cfi_offset %rbx,-24
4225 pushq %r12
4226.cfi_adjust_cfa_offset 8
4227.cfi_offset %r12,-32
4228 pushq %r13
4229.cfi_adjust_cfa_offset 8
4230.cfi_offset %r13,-40
4231 pushq %r14
4232.cfi_adjust_cfa_offset 8
4233.cfi_offset %r14,-48
4234 pushq %r15
4235.cfi_adjust_cfa_offset 8
4236.cfi_offset %r15,-56
4237 subq $480+8,%rsp
4238.cfi_adjust_cfa_offset 32*15+8
4239.Ladd_affinex_body:
4240
4241 movdqu 0(%rsi),%xmm0
4242 movq %rdx,%rbx
4243 movdqu 16(%rsi),%xmm1
4244 movdqu 32(%rsi),%xmm2
4245 movdqu 48(%rsi),%xmm3
4246 movdqu 64(%rsi),%xmm4
4247 movdqu 80(%rsi),%xmm5
4248 movq 64+0(%rsi),%rdx
4249 movq 64+8(%rsi),%r14
4250 movq 64+16(%rsi),%r15
4251 movq 64+24(%rsi),%r8
4252 movdqa %xmm0,320(%rsp)
4253 movdqa %xmm1,320+16(%rsp)
4254 movdqa %xmm2,352(%rsp)
4255 movdqa %xmm3,352+16(%rsp)
4256 movdqa %xmm4,384(%rsp)
4257 movdqa %xmm5,384+16(%rsp)
4258 por %xmm4,%xmm5
4259
4260 movdqu 0(%rbx),%xmm0
4261 pshufd $0xb1,%xmm5,%xmm3
4262 movdqu 16(%rbx),%xmm1
4263 movdqu 32(%rbx),%xmm2
4264 por %xmm3,%xmm5
4265 movdqu 48(%rbx),%xmm3
4266 movdqa %xmm0,416(%rsp)
4267 pshufd $0x1e,%xmm5,%xmm4
4268 movdqa %xmm1,416+16(%rsp)
4269 por %xmm0,%xmm1
4270.byte 102,72,15,110,199
4271 movdqa %xmm2,448(%rsp)
4272 movdqa %xmm3,448+16(%rsp)
4273 por %xmm2,%xmm3
4274 por %xmm4,%xmm5
4275 pxor %xmm4,%xmm4
4276 por %xmm1,%xmm3
4277
4278 leaq 64-128(%rsi),%rsi
4279 leaq 32(%rsp),%rdi
4280 call __ecp_nistz256_sqr_montx
4281
4282 pcmpeqd %xmm4,%xmm5
4283 pshufd $0xb1,%xmm3,%xmm4
4284 movq 0(%rbx),%rdx
4285
4286 movq %r12,%r9
4287 por %xmm3,%xmm4
4288 pshufd $0,%xmm5,%xmm5
4289 pshufd $0x1e,%xmm4,%xmm3
4290 movq %r13,%r10
4291 por %xmm3,%xmm4
4292 pxor %xmm3,%xmm3
4293 movq %r14,%r11
4294 pcmpeqd %xmm3,%xmm4
4295 pshufd $0,%xmm4,%xmm4
4296
4297 leaq 32-128(%rsp),%rsi
4298 movq %r15,%r12
4299 leaq 0(%rsp),%rdi
4300 call __ecp_nistz256_mul_montx
4301
4302 leaq 320(%rsp),%rbx
4303 leaq 64(%rsp),%rdi
4304 call __ecp_nistz256_sub_fromx
4305
4306 movq 384(%rsp),%rdx
4307 leaq 384(%rsp),%rbx
4308 movq 0+32(%rsp),%r9
4309 movq 8+32(%rsp),%r10
4310 leaq -128+32(%rsp),%rsi
4311 movq 16+32(%rsp),%r11
4312 movq 24+32(%rsp),%r12
4313 leaq 32(%rsp),%rdi
4314 call __ecp_nistz256_mul_montx
4315
4316 movq 384(%rsp),%rdx
4317 leaq 384(%rsp),%rbx
4318 movq 0+64(%rsp),%r9
4319 movq 8+64(%rsp),%r10
4320 leaq -128+64(%rsp),%rsi
4321 movq 16+64(%rsp),%r11
4322 movq 24+64(%rsp),%r12
4323 leaq 288(%rsp),%rdi
4324 call __ecp_nistz256_mul_montx
4325
4326 movq 448(%rsp),%rdx
4327 leaq 448(%rsp),%rbx
4328 movq 0+32(%rsp),%r9
4329 movq 8+32(%rsp),%r10
4330 leaq -128+32(%rsp),%rsi
4331 movq 16+32(%rsp),%r11
4332 movq 24+32(%rsp),%r12
4333 leaq 32(%rsp),%rdi
4334 call __ecp_nistz256_mul_montx
4335
4336 leaq 352(%rsp),%rbx
4337 leaq 96(%rsp),%rdi
4338 call __ecp_nistz256_sub_fromx
4339
4340 movq 0+64(%rsp),%rdx
4341 movq 8+64(%rsp),%r14
4342 leaq -128+64(%rsp),%rsi
4343 movq 16+64(%rsp),%r15
4344 movq 24+64(%rsp),%r8
4345 leaq 128(%rsp),%rdi
4346 call __ecp_nistz256_sqr_montx
4347
4348 movq 0+96(%rsp),%rdx
4349 movq 8+96(%rsp),%r14
4350 leaq -128+96(%rsp),%rsi
4351 movq 16+96(%rsp),%r15
4352 movq 24+96(%rsp),%r8
4353 leaq 192(%rsp),%rdi
4354 call __ecp_nistz256_sqr_montx
4355
4356 movq 128(%rsp),%rdx
4357 leaq 128(%rsp),%rbx
4358 movq 0+64(%rsp),%r9
4359 movq 8+64(%rsp),%r10
4360 leaq -128+64(%rsp),%rsi
4361 movq 16+64(%rsp),%r11
4362 movq 24+64(%rsp),%r12
4363 leaq 160(%rsp),%rdi
4364 call __ecp_nistz256_mul_montx
4365
4366 movq 320(%rsp),%rdx
4367 leaq 320(%rsp),%rbx
4368 movq 0+128(%rsp),%r9
4369 movq 8+128(%rsp),%r10
4370 leaq -128+128(%rsp),%rsi
4371 movq 16+128(%rsp),%r11
4372 movq 24+128(%rsp),%r12
4373 leaq 0(%rsp),%rdi
4374 call __ecp_nistz256_mul_montx
4375
4376
4377
4378
4379 xorq %r11,%r11
4380 addq %r12,%r12
4381 leaq 192(%rsp),%rsi
4382 adcq %r13,%r13
4383 movq %r12,%rax
4384 adcq %r8,%r8
4385 adcq %r9,%r9
4386 movq %r13,%rbp
4387 adcq $0,%r11
4388
4389 subq $-1,%r12
4390 movq %r8,%rcx
4391 sbbq %r14,%r13
4392 sbbq $0,%r8
4393 movq %r9,%r10
4394 sbbq %r15,%r9
4395 sbbq $0,%r11
4396
4397 cmovcq %rax,%r12
4398 movq 0(%rsi),%rax
4399 cmovcq %rbp,%r13
4400 movq 8(%rsi),%rbp
4401 cmovcq %rcx,%r8
4402 movq 16(%rsi),%rcx
4403 cmovcq %r10,%r9
4404 movq 24(%rsi),%r10
4405
4406 call __ecp_nistz256_subx
4407
4408 leaq 160(%rsp),%rbx
4409 leaq 224(%rsp),%rdi
4410 call __ecp_nistz256_sub_fromx
4411
4412 movq 0+0(%rsp),%rax
4413 movq 0+8(%rsp),%rbp
4414 movq 0+16(%rsp),%rcx
4415 movq 0+24(%rsp),%r10
4416 leaq 64(%rsp),%rdi
4417
4418 call __ecp_nistz256_subx
4419
4420 movq %r12,0(%rdi)
4421 movq %r13,8(%rdi)
4422 movq %r8,16(%rdi)
4423 movq %r9,24(%rdi)
4424 movq 352(%rsp),%rdx
4425 leaq 352(%rsp),%rbx
4426 movq 0+160(%rsp),%r9
4427 movq 8+160(%rsp),%r10
4428 leaq -128+160(%rsp),%rsi
4429 movq 16+160(%rsp),%r11
4430 movq 24+160(%rsp),%r12
4431 leaq 32(%rsp),%rdi
4432 call __ecp_nistz256_mul_montx
4433
4434 movq 96(%rsp),%rdx
4435 leaq 96(%rsp),%rbx
4436 movq 0+64(%rsp),%r9
4437 movq 8+64(%rsp),%r10
4438 leaq -128+64(%rsp),%rsi
4439 movq 16+64(%rsp),%r11
4440 movq 24+64(%rsp),%r12
4441 leaq 64(%rsp),%rdi
4442 call __ecp_nistz256_mul_montx
4443
4444 leaq 32(%rsp),%rbx
4445 leaq 256(%rsp),%rdi
4446 call __ecp_nistz256_sub_fromx
4447
4448.byte 102,72,15,126,199
4449
4450 movdqa %xmm5,%xmm0
4451 movdqa %xmm5,%xmm1
4452 pandn 288(%rsp),%xmm0
4453 movdqa %xmm5,%xmm2
4454 pandn 288+16(%rsp),%xmm1
4455 movdqa %xmm5,%xmm3
4456 pand .LONE_mont(%rip),%xmm2
4457 pand .LONE_mont+16(%rip),%xmm3
4458 por %xmm0,%xmm2
4459 por %xmm1,%xmm3
4460
4461 movdqa %xmm4,%xmm0
4462 movdqa %xmm4,%xmm1
4463 pandn %xmm2,%xmm0
4464 movdqa %xmm4,%xmm2
4465 pandn %xmm3,%xmm1
4466 movdqa %xmm4,%xmm3
4467 pand 384(%rsp),%xmm2
4468 pand 384+16(%rsp),%xmm3
4469 por %xmm0,%xmm2
4470 por %xmm1,%xmm3
4471 movdqu %xmm2,64(%rdi)
4472 movdqu %xmm3,80(%rdi)
4473
4474 movdqa %xmm5,%xmm0
4475 movdqa %xmm5,%xmm1
4476 pandn 224(%rsp),%xmm0
4477 movdqa %xmm5,%xmm2
4478 pandn 224+16(%rsp),%xmm1
4479 movdqa %xmm5,%xmm3
4480 pand 416(%rsp),%xmm2
4481 pand 416+16(%rsp),%xmm3
4482 por %xmm0,%xmm2
4483 por %xmm1,%xmm3
4484
4485 movdqa %xmm4,%xmm0
4486 movdqa %xmm4,%xmm1
4487 pandn %xmm2,%xmm0
4488 movdqa %xmm4,%xmm2
4489 pandn %xmm3,%xmm1
4490 movdqa %xmm4,%xmm3
4491 pand 320(%rsp),%xmm2
4492 pand 320+16(%rsp),%xmm3
4493 por %xmm0,%xmm2
4494 por %xmm1,%xmm3
4495 movdqu %xmm2,0(%rdi)
4496 movdqu %xmm3,16(%rdi)
4497
4498 movdqa %xmm5,%xmm0
4499 movdqa %xmm5,%xmm1
4500 pandn 256(%rsp),%xmm0
4501 movdqa %xmm5,%xmm2
4502 pandn 256+16(%rsp),%xmm1
4503 movdqa %xmm5,%xmm3
4504 pand 448(%rsp),%xmm2
4505 pand 448+16(%rsp),%xmm3
4506 por %xmm0,%xmm2
4507 por %xmm1,%xmm3
4508
4509 movdqa %xmm4,%xmm0
4510 movdqa %xmm4,%xmm1
4511 pandn %xmm2,%xmm0
4512 movdqa %xmm4,%xmm2
4513 pandn %xmm3,%xmm1
4514 movdqa %xmm4,%xmm3
4515 pand 352(%rsp),%xmm2
4516 pand 352+16(%rsp),%xmm3
4517 por %xmm0,%xmm2
4518 por %xmm1,%xmm3
4519 movdqu %xmm2,32(%rdi)
4520 movdqu %xmm3,48(%rdi)
4521
4522 leaq 480+56(%rsp),%rsi
4523.cfi_def_cfa %rsi,8
4524 movq -48(%rsi),%r15
4525.cfi_restore %r15
4526 movq -40(%rsi),%r14
4527.cfi_restore %r14
4528 movq -32(%rsi),%r13
4529.cfi_restore %r13
4530 movq -24(%rsi),%r12
4531.cfi_restore %r12
4532 movq -16(%rsi),%rbx
4533.cfi_restore %rbx
4534 movq -8(%rsi),%rbp
4535.cfi_restore %rbp
4536 leaq (%rsi),%rsp
4537.cfi_def_cfa_register %rsp
4538.Ladd_affinex_epilogue:
4539 .byte 0xf3,0xc3
4540.cfi_endproc
4541.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
4542#endif
4543