1 | # This file is generated from a similarly-named Perl script in the BoringSSL |
2 | # source tree. Do not edit by hand. |
3 | |
4 | #if defined(__has_feature) |
5 | #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) |
6 | #define OPENSSL_NO_ASM |
7 | #endif |
8 | #endif |
9 | |
10 | #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) |
11 | #if defined(BORINGSSL_PREFIX) |
12 | #include <boringssl_prefix_symbols_asm.h> |
13 | #endif |
14 | .text |
15 | .extern OPENSSL_ia32cap_P |
16 | .hidden OPENSSL_ia32cap_P |
17 | |
18 | |
19 | .align 64 |
20 | .Lpoly: |
21 | .quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 |
22 | |
23 | .LOne: |
24 | .long 1,1,1,1,1,1,1,1 |
25 | .LTwo: |
26 | .long 2,2,2,2,2,2,2,2 |
27 | .LThree: |
28 | .long 3,3,3,3,3,3,3,3 |
29 | .LONE_mont: |
30 | .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe |
31 | |
32 | |
33 | .Lord: |
34 | .quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 |
35 | .LordK: |
36 | .quad 0xccd1c8aaee00bc4f |
37 | |
38 | |
39 | |
40 | .globl ecp_nistz256_neg |
41 | .hidden ecp_nistz256_neg |
42 | .type ecp_nistz256_neg,@function |
43 | .align 32 |
44 | ecp_nistz256_neg: |
45 | .cfi_startproc |
46 | pushq %r12 |
47 | .cfi_adjust_cfa_offset 8 |
48 | .cfi_offset %r12,-16 |
49 | pushq %r13 |
50 | .cfi_adjust_cfa_offset 8 |
51 | .cfi_offset %r13,-24 |
52 | .Lneg_body: |
53 | |
54 | xorq %r8,%r8 |
55 | xorq %r9,%r9 |
56 | xorq %r10,%r10 |
57 | xorq %r11,%r11 |
58 | xorq %r13,%r13 |
59 | |
60 | subq 0(%rsi),%r8 |
61 | sbbq 8(%rsi),%r9 |
62 | sbbq 16(%rsi),%r10 |
63 | movq %r8,%rax |
64 | sbbq 24(%rsi),%r11 |
65 | leaq .Lpoly(%rip),%rsi |
66 | movq %r9,%rdx |
67 | sbbq $0,%r13 |
68 | |
69 | addq 0(%rsi),%r8 |
70 | movq %r10,%rcx |
71 | adcq 8(%rsi),%r9 |
72 | adcq 16(%rsi),%r10 |
73 | movq %r11,%r12 |
74 | adcq 24(%rsi),%r11 |
75 | testq %r13,%r13 |
76 | |
77 | cmovzq %rax,%r8 |
78 | cmovzq %rdx,%r9 |
79 | movq %r8,0(%rdi) |
80 | cmovzq %rcx,%r10 |
81 | movq %r9,8(%rdi) |
82 | cmovzq %r12,%r11 |
83 | movq %r10,16(%rdi) |
84 | movq %r11,24(%rdi) |
85 | |
86 | movq 0(%rsp),%r13 |
87 | .cfi_restore %r13 |
88 | movq 8(%rsp),%r12 |
89 | .cfi_restore %r12 |
90 | leaq 16(%rsp),%rsp |
91 | .cfi_adjust_cfa_offset -16 |
92 | .Lneg_epilogue: |
93 | .byte 0xf3,0xc3 |
94 | .cfi_endproc |
95 | .size ecp_nistz256_neg,.-ecp_nistz256_neg |
96 | |
97 | |
98 | |
99 | |
100 | |
101 | |
102 | .globl ecp_nistz256_ord_mul_mont |
103 | .hidden ecp_nistz256_ord_mul_mont |
104 | .type ecp_nistz256_ord_mul_mont,@function |
105 | .align 32 |
106 | ecp_nistz256_ord_mul_mont: |
107 | .cfi_startproc |
108 | leaq OPENSSL_ia32cap_P(%rip),%rcx |
109 | movq 8(%rcx),%rcx |
110 | andl $0x80100,%ecx |
111 | cmpl $0x80100,%ecx |
112 | je .Lecp_nistz256_ord_mul_montx |
113 | pushq %rbp |
114 | .cfi_adjust_cfa_offset 8 |
115 | .cfi_offset %rbp,-16 |
116 | pushq %rbx |
117 | .cfi_adjust_cfa_offset 8 |
118 | .cfi_offset %rbx,-24 |
119 | pushq %r12 |
120 | .cfi_adjust_cfa_offset 8 |
121 | .cfi_offset %r12,-32 |
122 | pushq %r13 |
123 | .cfi_adjust_cfa_offset 8 |
124 | .cfi_offset %r13,-40 |
125 | pushq %r14 |
126 | .cfi_adjust_cfa_offset 8 |
127 | .cfi_offset %r14,-48 |
128 | pushq %r15 |
129 | .cfi_adjust_cfa_offset 8 |
130 | .cfi_offset %r15,-56 |
131 | .Lord_mul_body: |
132 | |
133 | movq 0(%rdx),%rax |
134 | movq %rdx,%rbx |
135 | leaq .Lord(%rip),%r14 |
136 | movq .LordK(%rip),%r15 |
137 | |
138 | |
139 | movq %rax,%rcx |
140 | mulq 0(%rsi) |
141 | movq %rax,%r8 |
142 | movq %rcx,%rax |
143 | movq %rdx,%r9 |
144 | |
145 | mulq 8(%rsi) |
146 | addq %rax,%r9 |
147 | movq %rcx,%rax |
148 | adcq $0,%rdx |
149 | movq %rdx,%r10 |
150 | |
151 | mulq 16(%rsi) |
152 | addq %rax,%r10 |
153 | movq %rcx,%rax |
154 | adcq $0,%rdx |
155 | |
156 | movq %r8,%r13 |
157 | imulq %r15,%r8 |
158 | |
159 | movq %rdx,%r11 |
160 | mulq 24(%rsi) |
161 | addq %rax,%r11 |
162 | movq %r8,%rax |
163 | adcq $0,%rdx |
164 | movq %rdx,%r12 |
165 | |
166 | |
167 | mulq 0(%r14) |
168 | movq %r8,%rbp |
169 | addq %rax,%r13 |
170 | movq %r8,%rax |
171 | adcq $0,%rdx |
172 | movq %rdx,%rcx |
173 | |
174 | subq %r8,%r10 |
175 | sbbq $0,%r8 |
176 | |
177 | mulq 8(%r14) |
178 | addq %rcx,%r9 |
179 | adcq $0,%rdx |
180 | addq %rax,%r9 |
181 | movq %rbp,%rax |
182 | adcq %rdx,%r10 |
183 | movq %rbp,%rdx |
184 | adcq $0,%r8 |
185 | |
186 | shlq $32,%rax |
187 | shrq $32,%rdx |
188 | subq %rax,%r11 |
189 | movq 8(%rbx),%rax |
190 | sbbq %rdx,%rbp |
191 | |
192 | addq %r8,%r11 |
193 | adcq %rbp,%r12 |
194 | adcq $0,%r13 |
195 | |
196 | |
197 | movq %rax,%rcx |
198 | mulq 0(%rsi) |
199 | addq %rax,%r9 |
200 | movq %rcx,%rax |
201 | adcq $0,%rdx |
202 | movq %rdx,%rbp |
203 | |
204 | mulq 8(%rsi) |
205 | addq %rbp,%r10 |
206 | adcq $0,%rdx |
207 | addq %rax,%r10 |
208 | movq %rcx,%rax |
209 | adcq $0,%rdx |
210 | movq %rdx,%rbp |
211 | |
212 | mulq 16(%rsi) |
213 | addq %rbp,%r11 |
214 | adcq $0,%rdx |
215 | addq %rax,%r11 |
216 | movq %rcx,%rax |
217 | adcq $0,%rdx |
218 | |
219 | movq %r9,%rcx |
220 | imulq %r15,%r9 |
221 | |
222 | movq %rdx,%rbp |
223 | mulq 24(%rsi) |
224 | addq %rbp,%r12 |
225 | adcq $0,%rdx |
226 | xorq %r8,%r8 |
227 | addq %rax,%r12 |
228 | movq %r9,%rax |
229 | adcq %rdx,%r13 |
230 | adcq $0,%r8 |
231 | |
232 | |
233 | mulq 0(%r14) |
234 | movq %r9,%rbp |
235 | addq %rax,%rcx |
236 | movq %r9,%rax |
237 | adcq %rdx,%rcx |
238 | |
239 | subq %r9,%r11 |
240 | sbbq $0,%r9 |
241 | |
242 | mulq 8(%r14) |
243 | addq %rcx,%r10 |
244 | adcq $0,%rdx |
245 | addq %rax,%r10 |
246 | movq %rbp,%rax |
247 | adcq %rdx,%r11 |
248 | movq %rbp,%rdx |
249 | adcq $0,%r9 |
250 | |
251 | shlq $32,%rax |
252 | shrq $32,%rdx |
253 | subq %rax,%r12 |
254 | movq 16(%rbx),%rax |
255 | sbbq %rdx,%rbp |
256 | |
257 | addq %r9,%r12 |
258 | adcq %rbp,%r13 |
259 | adcq $0,%r8 |
260 | |
261 | |
262 | movq %rax,%rcx |
263 | mulq 0(%rsi) |
264 | addq %rax,%r10 |
265 | movq %rcx,%rax |
266 | adcq $0,%rdx |
267 | movq %rdx,%rbp |
268 | |
269 | mulq 8(%rsi) |
270 | addq %rbp,%r11 |
271 | adcq $0,%rdx |
272 | addq %rax,%r11 |
273 | movq %rcx,%rax |
274 | adcq $0,%rdx |
275 | movq %rdx,%rbp |
276 | |
277 | mulq 16(%rsi) |
278 | addq %rbp,%r12 |
279 | adcq $0,%rdx |
280 | addq %rax,%r12 |
281 | movq %rcx,%rax |
282 | adcq $0,%rdx |
283 | |
284 | movq %r10,%rcx |
285 | imulq %r15,%r10 |
286 | |
287 | movq %rdx,%rbp |
288 | mulq 24(%rsi) |
289 | addq %rbp,%r13 |
290 | adcq $0,%rdx |
291 | xorq %r9,%r9 |
292 | addq %rax,%r13 |
293 | movq %r10,%rax |
294 | adcq %rdx,%r8 |
295 | adcq $0,%r9 |
296 | |
297 | |
298 | mulq 0(%r14) |
299 | movq %r10,%rbp |
300 | addq %rax,%rcx |
301 | movq %r10,%rax |
302 | adcq %rdx,%rcx |
303 | |
304 | subq %r10,%r12 |
305 | sbbq $0,%r10 |
306 | |
307 | mulq 8(%r14) |
308 | addq %rcx,%r11 |
309 | adcq $0,%rdx |
310 | addq %rax,%r11 |
311 | movq %rbp,%rax |
312 | adcq %rdx,%r12 |
313 | movq %rbp,%rdx |
314 | adcq $0,%r10 |
315 | |
316 | shlq $32,%rax |
317 | shrq $32,%rdx |
318 | subq %rax,%r13 |
319 | movq 24(%rbx),%rax |
320 | sbbq %rdx,%rbp |
321 | |
322 | addq %r10,%r13 |
323 | adcq %rbp,%r8 |
324 | adcq $0,%r9 |
325 | |
326 | |
327 | movq %rax,%rcx |
328 | mulq 0(%rsi) |
329 | addq %rax,%r11 |
330 | movq %rcx,%rax |
331 | adcq $0,%rdx |
332 | movq %rdx,%rbp |
333 | |
334 | mulq 8(%rsi) |
335 | addq %rbp,%r12 |
336 | adcq $0,%rdx |
337 | addq %rax,%r12 |
338 | movq %rcx,%rax |
339 | adcq $0,%rdx |
340 | movq %rdx,%rbp |
341 | |
342 | mulq 16(%rsi) |
343 | addq %rbp,%r13 |
344 | adcq $0,%rdx |
345 | addq %rax,%r13 |
346 | movq %rcx,%rax |
347 | adcq $0,%rdx |
348 | |
349 | movq %r11,%rcx |
350 | imulq %r15,%r11 |
351 | |
352 | movq %rdx,%rbp |
353 | mulq 24(%rsi) |
354 | addq %rbp,%r8 |
355 | adcq $0,%rdx |
356 | xorq %r10,%r10 |
357 | addq %rax,%r8 |
358 | movq %r11,%rax |
359 | adcq %rdx,%r9 |
360 | adcq $0,%r10 |
361 | |
362 | |
363 | mulq 0(%r14) |
364 | movq %r11,%rbp |
365 | addq %rax,%rcx |
366 | movq %r11,%rax |
367 | adcq %rdx,%rcx |
368 | |
369 | subq %r11,%r13 |
370 | sbbq $0,%r11 |
371 | |
372 | mulq 8(%r14) |
373 | addq %rcx,%r12 |
374 | adcq $0,%rdx |
375 | addq %rax,%r12 |
376 | movq %rbp,%rax |
377 | adcq %rdx,%r13 |
378 | movq %rbp,%rdx |
379 | adcq $0,%r11 |
380 | |
381 | shlq $32,%rax |
382 | shrq $32,%rdx |
383 | subq %rax,%r8 |
384 | sbbq %rdx,%rbp |
385 | |
386 | addq %r11,%r8 |
387 | adcq %rbp,%r9 |
388 | adcq $0,%r10 |
389 | |
390 | |
391 | movq %r12,%rsi |
392 | subq 0(%r14),%r12 |
393 | movq %r13,%r11 |
394 | sbbq 8(%r14),%r13 |
395 | movq %r8,%rcx |
396 | sbbq 16(%r14),%r8 |
397 | movq %r9,%rbp |
398 | sbbq 24(%r14),%r9 |
399 | sbbq $0,%r10 |
400 | |
401 | cmovcq %rsi,%r12 |
402 | cmovcq %r11,%r13 |
403 | cmovcq %rcx,%r8 |
404 | cmovcq %rbp,%r9 |
405 | |
406 | movq %r12,0(%rdi) |
407 | movq %r13,8(%rdi) |
408 | movq %r8,16(%rdi) |
409 | movq %r9,24(%rdi) |
410 | |
411 | movq 0(%rsp),%r15 |
412 | .cfi_restore %r15 |
413 | movq 8(%rsp),%r14 |
414 | .cfi_restore %r14 |
415 | movq 16(%rsp),%r13 |
416 | .cfi_restore %r13 |
417 | movq 24(%rsp),%r12 |
418 | .cfi_restore %r12 |
419 | movq 32(%rsp),%rbx |
420 | .cfi_restore %rbx |
421 | movq 40(%rsp),%rbp |
422 | .cfi_restore %rbp |
423 | leaq 48(%rsp),%rsp |
424 | .cfi_adjust_cfa_offset -48 |
425 | .Lord_mul_epilogue: |
426 | .byte 0xf3,0xc3 |
427 | .cfi_endproc |
428 | .size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont |
429 | |
430 | |
431 | |
432 | |
433 | |
434 | |
435 | |
436 | .globl ecp_nistz256_ord_sqr_mont |
437 | .hidden ecp_nistz256_ord_sqr_mont |
438 | .type ecp_nistz256_ord_sqr_mont,@function |
439 | .align 32 |
440 | ecp_nistz256_ord_sqr_mont: |
441 | .cfi_startproc |
442 | leaq OPENSSL_ia32cap_P(%rip),%rcx |
443 | movq 8(%rcx),%rcx |
444 | andl $0x80100,%ecx |
445 | cmpl $0x80100,%ecx |
446 | je .Lecp_nistz256_ord_sqr_montx |
447 | pushq %rbp |
448 | .cfi_adjust_cfa_offset 8 |
449 | .cfi_offset %rbp,-16 |
450 | pushq %rbx |
451 | .cfi_adjust_cfa_offset 8 |
452 | .cfi_offset %rbx,-24 |
453 | pushq %r12 |
454 | .cfi_adjust_cfa_offset 8 |
455 | .cfi_offset %r12,-32 |
456 | pushq %r13 |
457 | .cfi_adjust_cfa_offset 8 |
458 | .cfi_offset %r13,-40 |
459 | pushq %r14 |
460 | .cfi_adjust_cfa_offset 8 |
461 | .cfi_offset %r14,-48 |
462 | pushq %r15 |
463 | .cfi_adjust_cfa_offset 8 |
464 | .cfi_offset %r15,-56 |
465 | .Lord_sqr_body: |
466 | |
467 | movq 0(%rsi),%r8 |
468 | movq 8(%rsi),%rax |
469 | movq 16(%rsi),%r14 |
470 | movq 24(%rsi),%r15 |
471 | leaq .Lord(%rip),%rsi |
472 | movq %rdx,%rbx |
473 | jmp .Loop_ord_sqr |
474 | |
475 | .align 32 |
476 | .Loop_ord_sqr: |
477 | |
478 | movq %rax,%rbp |
479 | mulq %r8 |
480 | movq %rax,%r9 |
481 | .byte 102,72,15,110,205 |
482 | movq %r14,%rax |
483 | movq %rdx,%r10 |
484 | |
485 | mulq %r8 |
486 | addq %rax,%r10 |
487 | movq %r15,%rax |
488 | .byte 102,73,15,110,214 |
489 | adcq $0,%rdx |
490 | movq %rdx,%r11 |
491 | |
492 | mulq %r8 |
493 | addq %rax,%r11 |
494 | movq %r15,%rax |
495 | .byte 102,73,15,110,223 |
496 | adcq $0,%rdx |
497 | movq %rdx,%r12 |
498 | |
499 | |
500 | mulq %r14 |
501 | movq %rax,%r13 |
502 | movq %r14,%rax |
503 | movq %rdx,%r14 |
504 | |
505 | |
506 | mulq %rbp |
507 | addq %rax,%r11 |
508 | movq %r15,%rax |
509 | adcq $0,%rdx |
510 | movq %rdx,%r15 |
511 | |
512 | mulq %rbp |
513 | addq %rax,%r12 |
514 | adcq $0,%rdx |
515 | |
516 | addq %r15,%r12 |
517 | adcq %rdx,%r13 |
518 | adcq $0,%r14 |
519 | |
520 | |
521 | xorq %r15,%r15 |
522 | movq %r8,%rax |
523 | addq %r9,%r9 |
524 | adcq %r10,%r10 |
525 | adcq %r11,%r11 |
526 | adcq %r12,%r12 |
527 | adcq %r13,%r13 |
528 | adcq %r14,%r14 |
529 | adcq $0,%r15 |
530 | |
531 | |
532 | mulq %rax |
533 | movq %rax,%r8 |
534 | .byte 102,72,15,126,200 |
535 | movq %rdx,%rbp |
536 | |
537 | mulq %rax |
538 | addq %rbp,%r9 |
539 | adcq %rax,%r10 |
540 | .byte 102,72,15,126,208 |
541 | adcq $0,%rdx |
542 | movq %rdx,%rbp |
543 | |
544 | mulq %rax |
545 | addq %rbp,%r11 |
546 | adcq %rax,%r12 |
547 | .byte 102,72,15,126,216 |
548 | adcq $0,%rdx |
549 | movq %rdx,%rbp |
550 | |
551 | movq %r8,%rcx |
552 | imulq 32(%rsi),%r8 |
553 | |
554 | mulq %rax |
555 | addq %rbp,%r13 |
556 | adcq %rax,%r14 |
557 | movq 0(%rsi),%rax |
558 | adcq %rdx,%r15 |
559 | |
560 | |
561 | mulq %r8 |
562 | movq %r8,%rbp |
563 | addq %rax,%rcx |
564 | movq 8(%rsi),%rax |
565 | adcq %rdx,%rcx |
566 | |
567 | subq %r8,%r10 |
568 | sbbq $0,%rbp |
569 | |
570 | mulq %r8 |
571 | addq %rcx,%r9 |
572 | adcq $0,%rdx |
573 | addq %rax,%r9 |
574 | movq %r8,%rax |
575 | adcq %rdx,%r10 |
576 | movq %r8,%rdx |
577 | adcq $0,%rbp |
578 | |
579 | movq %r9,%rcx |
580 | imulq 32(%rsi),%r9 |
581 | |
582 | shlq $32,%rax |
583 | shrq $32,%rdx |
584 | subq %rax,%r11 |
585 | movq 0(%rsi),%rax |
586 | sbbq %rdx,%r8 |
587 | |
588 | addq %rbp,%r11 |
589 | adcq $0,%r8 |
590 | |
591 | |
592 | mulq %r9 |
593 | movq %r9,%rbp |
594 | addq %rax,%rcx |
595 | movq 8(%rsi),%rax |
596 | adcq %rdx,%rcx |
597 | |
598 | subq %r9,%r11 |
599 | sbbq $0,%rbp |
600 | |
601 | mulq %r9 |
602 | addq %rcx,%r10 |
603 | adcq $0,%rdx |
604 | addq %rax,%r10 |
605 | movq %r9,%rax |
606 | adcq %rdx,%r11 |
607 | movq %r9,%rdx |
608 | adcq $0,%rbp |
609 | |
610 | movq %r10,%rcx |
611 | imulq 32(%rsi),%r10 |
612 | |
613 | shlq $32,%rax |
614 | shrq $32,%rdx |
615 | subq %rax,%r8 |
616 | movq 0(%rsi),%rax |
617 | sbbq %rdx,%r9 |
618 | |
619 | addq %rbp,%r8 |
620 | adcq $0,%r9 |
621 | |
622 | |
623 | mulq %r10 |
624 | movq %r10,%rbp |
625 | addq %rax,%rcx |
626 | movq 8(%rsi),%rax |
627 | adcq %rdx,%rcx |
628 | |
629 | subq %r10,%r8 |
630 | sbbq $0,%rbp |
631 | |
632 | mulq %r10 |
633 | addq %rcx,%r11 |
634 | adcq $0,%rdx |
635 | addq %rax,%r11 |
636 | movq %r10,%rax |
637 | adcq %rdx,%r8 |
638 | movq %r10,%rdx |
639 | adcq $0,%rbp |
640 | |
641 | movq %r11,%rcx |
642 | imulq 32(%rsi),%r11 |
643 | |
644 | shlq $32,%rax |
645 | shrq $32,%rdx |
646 | subq %rax,%r9 |
647 | movq 0(%rsi),%rax |
648 | sbbq %rdx,%r10 |
649 | |
650 | addq %rbp,%r9 |
651 | adcq $0,%r10 |
652 | |
653 | |
654 | mulq %r11 |
655 | movq %r11,%rbp |
656 | addq %rax,%rcx |
657 | movq 8(%rsi),%rax |
658 | adcq %rdx,%rcx |
659 | |
660 | subq %r11,%r9 |
661 | sbbq $0,%rbp |
662 | |
663 | mulq %r11 |
664 | addq %rcx,%r8 |
665 | adcq $0,%rdx |
666 | addq %rax,%r8 |
667 | movq %r11,%rax |
668 | adcq %rdx,%r9 |
669 | movq %r11,%rdx |
670 | adcq $0,%rbp |
671 | |
672 | shlq $32,%rax |
673 | shrq $32,%rdx |
674 | subq %rax,%r10 |
675 | sbbq %rdx,%r11 |
676 | |
677 | addq %rbp,%r10 |
678 | adcq $0,%r11 |
679 | |
680 | |
681 | xorq %rdx,%rdx |
682 | addq %r12,%r8 |
683 | adcq %r13,%r9 |
684 | movq %r8,%r12 |
685 | adcq %r14,%r10 |
686 | adcq %r15,%r11 |
687 | movq %r9,%rax |
688 | adcq $0,%rdx |
689 | |
690 | |
691 | subq 0(%rsi),%r8 |
692 | movq %r10,%r14 |
693 | sbbq 8(%rsi),%r9 |
694 | sbbq 16(%rsi),%r10 |
695 | movq %r11,%r15 |
696 | sbbq 24(%rsi),%r11 |
697 | sbbq $0,%rdx |
698 | |
699 | cmovcq %r12,%r8 |
700 | cmovncq %r9,%rax |
701 | cmovncq %r10,%r14 |
702 | cmovncq %r11,%r15 |
703 | |
704 | decq %rbx |
705 | jnz .Loop_ord_sqr |
706 | |
707 | movq %r8,0(%rdi) |
708 | movq %rax,8(%rdi) |
709 | pxor %xmm1,%xmm1 |
710 | movq %r14,16(%rdi) |
711 | pxor %xmm2,%xmm2 |
712 | movq %r15,24(%rdi) |
713 | pxor %xmm3,%xmm3 |
714 | |
715 | movq 0(%rsp),%r15 |
716 | .cfi_restore %r15 |
717 | movq 8(%rsp),%r14 |
718 | .cfi_restore %r14 |
719 | movq 16(%rsp),%r13 |
720 | .cfi_restore %r13 |
721 | movq 24(%rsp),%r12 |
722 | .cfi_restore %r12 |
723 | movq 32(%rsp),%rbx |
724 | .cfi_restore %rbx |
725 | movq 40(%rsp),%rbp |
726 | .cfi_restore %rbp |
727 | leaq 48(%rsp),%rsp |
728 | .cfi_adjust_cfa_offset -48 |
729 | .Lord_sqr_epilogue: |
730 | .byte 0xf3,0xc3 |
731 | .cfi_endproc |
732 | .size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont |
733 | |
734 | .type ecp_nistz256_ord_mul_montx,@function |
735 | .align 32 |
736 | ecp_nistz256_ord_mul_montx: |
737 | .cfi_startproc |
738 | .Lecp_nistz256_ord_mul_montx: |
739 | pushq %rbp |
740 | .cfi_adjust_cfa_offset 8 |
741 | .cfi_offset %rbp,-16 |
742 | pushq %rbx |
743 | .cfi_adjust_cfa_offset 8 |
744 | .cfi_offset %rbx,-24 |
745 | pushq %r12 |
746 | .cfi_adjust_cfa_offset 8 |
747 | .cfi_offset %r12,-32 |
748 | pushq %r13 |
749 | .cfi_adjust_cfa_offset 8 |
750 | .cfi_offset %r13,-40 |
751 | pushq %r14 |
752 | .cfi_adjust_cfa_offset 8 |
753 | .cfi_offset %r14,-48 |
754 | pushq %r15 |
755 | .cfi_adjust_cfa_offset 8 |
756 | .cfi_offset %r15,-56 |
757 | .Lord_mulx_body: |
758 | |
759 | movq %rdx,%rbx |
760 | movq 0(%rdx),%rdx |
761 | movq 0(%rsi),%r9 |
762 | movq 8(%rsi),%r10 |
763 | movq 16(%rsi),%r11 |
764 | movq 24(%rsi),%r12 |
765 | leaq -128(%rsi),%rsi |
766 | leaq .Lord-128(%rip),%r14 |
767 | movq .LordK(%rip),%r15 |
768 | |
769 | |
770 | mulxq %r9,%r8,%r9 |
771 | mulxq %r10,%rcx,%r10 |
772 | mulxq %r11,%rbp,%r11 |
773 | addq %rcx,%r9 |
774 | mulxq %r12,%rcx,%r12 |
775 | movq %r8,%rdx |
776 | mulxq %r15,%rdx,%rax |
777 | adcq %rbp,%r10 |
778 | adcq %rcx,%r11 |
779 | adcq $0,%r12 |
780 | |
781 | |
782 | xorq %r13,%r13 |
783 | mulxq 0+128(%r14),%rcx,%rbp |
784 | adcxq %rcx,%r8 |
785 | adoxq %rbp,%r9 |
786 | |
787 | mulxq 8+128(%r14),%rcx,%rbp |
788 | adcxq %rcx,%r9 |
789 | adoxq %rbp,%r10 |
790 | |
791 | mulxq 16+128(%r14),%rcx,%rbp |
792 | adcxq %rcx,%r10 |
793 | adoxq %rbp,%r11 |
794 | |
795 | mulxq 24+128(%r14),%rcx,%rbp |
796 | movq 8(%rbx),%rdx |
797 | adcxq %rcx,%r11 |
798 | adoxq %rbp,%r12 |
799 | adcxq %r8,%r12 |
800 | adoxq %r8,%r13 |
801 | adcq $0,%r13 |
802 | |
803 | |
804 | mulxq 0+128(%rsi),%rcx,%rbp |
805 | adcxq %rcx,%r9 |
806 | adoxq %rbp,%r10 |
807 | |
808 | mulxq 8+128(%rsi),%rcx,%rbp |
809 | adcxq %rcx,%r10 |
810 | adoxq %rbp,%r11 |
811 | |
812 | mulxq 16+128(%rsi),%rcx,%rbp |
813 | adcxq %rcx,%r11 |
814 | adoxq %rbp,%r12 |
815 | |
816 | mulxq 24+128(%rsi),%rcx,%rbp |
817 | movq %r9,%rdx |
818 | mulxq %r15,%rdx,%rax |
819 | adcxq %rcx,%r12 |
820 | adoxq %rbp,%r13 |
821 | |
822 | adcxq %r8,%r13 |
823 | adoxq %r8,%r8 |
824 | adcq $0,%r8 |
825 | |
826 | |
827 | mulxq 0+128(%r14),%rcx,%rbp |
828 | adcxq %rcx,%r9 |
829 | adoxq %rbp,%r10 |
830 | |
831 | mulxq 8+128(%r14),%rcx,%rbp |
832 | adcxq %rcx,%r10 |
833 | adoxq %rbp,%r11 |
834 | |
835 | mulxq 16+128(%r14),%rcx,%rbp |
836 | adcxq %rcx,%r11 |
837 | adoxq %rbp,%r12 |
838 | |
839 | mulxq 24+128(%r14),%rcx,%rbp |
840 | movq 16(%rbx),%rdx |
841 | adcxq %rcx,%r12 |
842 | adoxq %rbp,%r13 |
843 | adcxq %r9,%r13 |
844 | adoxq %r9,%r8 |
845 | adcq $0,%r8 |
846 | |
847 | |
848 | mulxq 0+128(%rsi),%rcx,%rbp |
849 | adcxq %rcx,%r10 |
850 | adoxq %rbp,%r11 |
851 | |
852 | mulxq 8+128(%rsi),%rcx,%rbp |
853 | adcxq %rcx,%r11 |
854 | adoxq %rbp,%r12 |
855 | |
856 | mulxq 16+128(%rsi),%rcx,%rbp |
857 | adcxq %rcx,%r12 |
858 | adoxq %rbp,%r13 |
859 | |
860 | mulxq 24+128(%rsi),%rcx,%rbp |
861 | movq %r10,%rdx |
862 | mulxq %r15,%rdx,%rax |
863 | adcxq %rcx,%r13 |
864 | adoxq %rbp,%r8 |
865 | |
866 | adcxq %r9,%r8 |
867 | adoxq %r9,%r9 |
868 | adcq $0,%r9 |
869 | |
870 | |
871 | mulxq 0+128(%r14),%rcx,%rbp |
872 | adcxq %rcx,%r10 |
873 | adoxq %rbp,%r11 |
874 | |
875 | mulxq 8+128(%r14),%rcx,%rbp |
876 | adcxq %rcx,%r11 |
877 | adoxq %rbp,%r12 |
878 | |
879 | mulxq 16+128(%r14),%rcx,%rbp |
880 | adcxq %rcx,%r12 |
881 | adoxq %rbp,%r13 |
882 | |
883 | mulxq 24+128(%r14),%rcx,%rbp |
884 | movq 24(%rbx),%rdx |
885 | adcxq %rcx,%r13 |
886 | adoxq %rbp,%r8 |
887 | adcxq %r10,%r8 |
888 | adoxq %r10,%r9 |
889 | adcq $0,%r9 |
890 | |
891 | |
892 | mulxq 0+128(%rsi),%rcx,%rbp |
893 | adcxq %rcx,%r11 |
894 | adoxq %rbp,%r12 |
895 | |
896 | mulxq 8+128(%rsi),%rcx,%rbp |
897 | adcxq %rcx,%r12 |
898 | adoxq %rbp,%r13 |
899 | |
900 | mulxq 16+128(%rsi),%rcx,%rbp |
901 | adcxq %rcx,%r13 |
902 | adoxq %rbp,%r8 |
903 | |
904 | mulxq 24+128(%rsi),%rcx,%rbp |
905 | movq %r11,%rdx |
906 | mulxq %r15,%rdx,%rax |
907 | adcxq %rcx,%r8 |
908 | adoxq %rbp,%r9 |
909 | |
910 | adcxq %r10,%r9 |
911 | adoxq %r10,%r10 |
912 | adcq $0,%r10 |
913 | |
914 | |
915 | mulxq 0+128(%r14),%rcx,%rbp |
916 | adcxq %rcx,%r11 |
917 | adoxq %rbp,%r12 |
918 | |
919 | mulxq 8+128(%r14),%rcx,%rbp |
920 | adcxq %rcx,%r12 |
921 | adoxq %rbp,%r13 |
922 | |
923 | mulxq 16+128(%r14),%rcx,%rbp |
924 | adcxq %rcx,%r13 |
925 | adoxq %rbp,%r8 |
926 | |
927 | mulxq 24+128(%r14),%rcx,%rbp |
928 | leaq 128(%r14),%r14 |
929 | movq %r12,%rbx |
930 | adcxq %rcx,%r8 |
931 | adoxq %rbp,%r9 |
932 | movq %r13,%rdx |
933 | adcxq %r11,%r9 |
934 | adoxq %r11,%r10 |
935 | adcq $0,%r10 |
936 | |
937 | |
938 | |
939 | movq %r8,%rcx |
940 | subq 0(%r14),%r12 |
941 | sbbq 8(%r14),%r13 |
942 | sbbq 16(%r14),%r8 |
943 | movq %r9,%rbp |
944 | sbbq 24(%r14),%r9 |
945 | sbbq $0,%r10 |
946 | |
947 | cmovcq %rbx,%r12 |
948 | cmovcq %rdx,%r13 |
949 | cmovcq %rcx,%r8 |
950 | cmovcq %rbp,%r9 |
951 | |
952 | movq %r12,0(%rdi) |
953 | movq %r13,8(%rdi) |
954 | movq %r8,16(%rdi) |
955 | movq %r9,24(%rdi) |
956 | |
957 | movq 0(%rsp),%r15 |
958 | .cfi_restore %r15 |
959 | movq 8(%rsp),%r14 |
960 | .cfi_restore %r14 |
961 | movq 16(%rsp),%r13 |
962 | .cfi_restore %r13 |
963 | movq 24(%rsp),%r12 |
964 | .cfi_restore %r12 |
965 | movq 32(%rsp),%rbx |
966 | .cfi_restore %rbx |
967 | movq 40(%rsp),%rbp |
968 | .cfi_restore %rbp |
969 | leaq 48(%rsp),%rsp |
970 | .cfi_adjust_cfa_offset -48 |
971 | .Lord_mulx_epilogue: |
972 | .byte 0xf3,0xc3 |
973 | .cfi_endproc |
974 | .size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx |
975 | |
976 | .type ecp_nistz256_ord_sqr_montx,@function |
977 | .align 32 |
978 | ecp_nistz256_ord_sqr_montx: |
979 | .cfi_startproc |
980 | .Lecp_nistz256_ord_sqr_montx: |
981 | pushq %rbp |
982 | .cfi_adjust_cfa_offset 8 |
983 | .cfi_offset %rbp,-16 |
984 | pushq %rbx |
985 | .cfi_adjust_cfa_offset 8 |
986 | .cfi_offset %rbx,-24 |
987 | pushq %r12 |
988 | .cfi_adjust_cfa_offset 8 |
989 | .cfi_offset %r12,-32 |
990 | pushq %r13 |
991 | .cfi_adjust_cfa_offset 8 |
992 | .cfi_offset %r13,-40 |
993 | pushq %r14 |
994 | .cfi_adjust_cfa_offset 8 |
995 | .cfi_offset %r14,-48 |
996 | pushq %r15 |
997 | .cfi_adjust_cfa_offset 8 |
998 | .cfi_offset %r15,-56 |
999 | .Lord_sqrx_body: |
1000 | |
1001 | movq %rdx,%rbx |
1002 | movq 0(%rsi),%rdx |
1003 | movq 8(%rsi),%r14 |
1004 | movq 16(%rsi),%r15 |
1005 | movq 24(%rsi),%r8 |
1006 | leaq .Lord(%rip),%rsi |
1007 | jmp .Loop_ord_sqrx |
1008 | |
1009 | .align 32 |
1010 | .Loop_ord_sqrx: |
1011 | mulxq %r14,%r9,%r10 |
1012 | mulxq %r15,%rcx,%r11 |
1013 | movq %rdx,%rax |
1014 | .byte 102,73,15,110,206 |
1015 | mulxq %r8,%rbp,%r12 |
1016 | movq %r14,%rdx |
1017 | addq %rcx,%r10 |
1018 | .byte 102,73,15,110,215 |
1019 | adcq %rbp,%r11 |
1020 | adcq $0,%r12 |
1021 | xorq %r13,%r13 |
1022 | |
1023 | mulxq %r15,%rcx,%rbp |
1024 | adcxq %rcx,%r11 |
1025 | adoxq %rbp,%r12 |
1026 | |
1027 | mulxq %r8,%rcx,%rbp |
1028 | movq %r15,%rdx |
1029 | adcxq %rcx,%r12 |
1030 | adoxq %rbp,%r13 |
1031 | adcq $0,%r13 |
1032 | |
1033 | mulxq %r8,%rcx,%r14 |
1034 | movq %rax,%rdx |
1035 | .byte 102,73,15,110,216 |
1036 | xorq %r15,%r15 |
1037 | adcxq %r9,%r9 |
1038 | adoxq %rcx,%r13 |
1039 | adcxq %r10,%r10 |
1040 | adoxq %r15,%r14 |
1041 | |
1042 | |
1043 | mulxq %rdx,%r8,%rbp |
1044 | .byte 102,72,15,126,202 |
1045 | adcxq %r11,%r11 |
1046 | adoxq %rbp,%r9 |
1047 | adcxq %r12,%r12 |
1048 | mulxq %rdx,%rcx,%rax |
1049 | .byte 102,72,15,126,210 |
1050 | adcxq %r13,%r13 |
1051 | adoxq %rcx,%r10 |
1052 | adcxq %r14,%r14 |
1053 | mulxq %rdx,%rcx,%rbp |
1054 | .byte 0x67 |
1055 | .byte 102,72,15,126,218 |
1056 | adoxq %rax,%r11 |
1057 | adcxq %r15,%r15 |
1058 | adoxq %rcx,%r12 |
1059 | adoxq %rbp,%r13 |
1060 | mulxq %rdx,%rcx,%rax |
1061 | adoxq %rcx,%r14 |
1062 | adoxq %rax,%r15 |
1063 | |
1064 | |
1065 | movq %r8,%rdx |
1066 | mulxq 32(%rsi),%rdx,%rcx |
1067 | |
1068 | xorq %rax,%rax |
1069 | mulxq 0(%rsi),%rcx,%rbp |
1070 | adcxq %rcx,%r8 |
1071 | adoxq %rbp,%r9 |
1072 | mulxq 8(%rsi),%rcx,%rbp |
1073 | adcxq %rcx,%r9 |
1074 | adoxq %rbp,%r10 |
1075 | mulxq 16(%rsi),%rcx,%rbp |
1076 | adcxq %rcx,%r10 |
1077 | adoxq %rbp,%r11 |
1078 | mulxq 24(%rsi),%rcx,%rbp |
1079 | adcxq %rcx,%r11 |
1080 | adoxq %rbp,%r8 |
1081 | adcxq %rax,%r8 |
1082 | |
1083 | |
1084 | movq %r9,%rdx |
1085 | mulxq 32(%rsi),%rdx,%rcx |
1086 | |
1087 | mulxq 0(%rsi),%rcx,%rbp |
1088 | adoxq %rcx,%r9 |
1089 | adcxq %rbp,%r10 |
1090 | mulxq 8(%rsi),%rcx,%rbp |
1091 | adoxq %rcx,%r10 |
1092 | adcxq %rbp,%r11 |
1093 | mulxq 16(%rsi),%rcx,%rbp |
1094 | adoxq %rcx,%r11 |
1095 | adcxq %rbp,%r8 |
1096 | mulxq 24(%rsi),%rcx,%rbp |
1097 | adoxq %rcx,%r8 |
1098 | adcxq %rbp,%r9 |
1099 | adoxq %rax,%r9 |
1100 | |
1101 | |
1102 | movq %r10,%rdx |
1103 | mulxq 32(%rsi),%rdx,%rcx |
1104 | |
1105 | mulxq 0(%rsi),%rcx,%rbp |
1106 | adcxq %rcx,%r10 |
1107 | adoxq %rbp,%r11 |
1108 | mulxq 8(%rsi),%rcx,%rbp |
1109 | adcxq %rcx,%r11 |
1110 | adoxq %rbp,%r8 |
1111 | mulxq 16(%rsi),%rcx,%rbp |
1112 | adcxq %rcx,%r8 |
1113 | adoxq %rbp,%r9 |
1114 | mulxq 24(%rsi),%rcx,%rbp |
1115 | adcxq %rcx,%r9 |
1116 | adoxq %rbp,%r10 |
1117 | adcxq %rax,%r10 |
1118 | |
1119 | |
1120 | movq %r11,%rdx |
1121 | mulxq 32(%rsi),%rdx,%rcx |
1122 | |
1123 | mulxq 0(%rsi),%rcx,%rbp |
1124 | adoxq %rcx,%r11 |
1125 | adcxq %rbp,%r8 |
1126 | mulxq 8(%rsi),%rcx,%rbp |
1127 | adoxq %rcx,%r8 |
1128 | adcxq %rbp,%r9 |
1129 | mulxq 16(%rsi),%rcx,%rbp |
1130 | adoxq %rcx,%r9 |
1131 | adcxq %rbp,%r10 |
1132 | mulxq 24(%rsi),%rcx,%rbp |
1133 | adoxq %rcx,%r10 |
1134 | adcxq %rbp,%r11 |
1135 | adoxq %rax,%r11 |
1136 | |
1137 | |
1138 | addq %r8,%r12 |
1139 | adcq %r13,%r9 |
1140 | movq %r12,%rdx |
1141 | adcq %r14,%r10 |
1142 | adcq %r15,%r11 |
1143 | movq %r9,%r14 |
1144 | adcq $0,%rax |
1145 | |
1146 | |
1147 | subq 0(%rsi),%r12 |
1148 | movq %r10,%r15 |
1149 | sbbq 8(%rsi),%r9 |
1150 | sbbq 16(%rsi),%r10 |
1151 | movq %r11,%r8 |
1152 | sbbq 24(%rsi),%r11 |
1153 | sbbq $0,%rax |
1154 | |
1155 | cmovncq %r12,%rdx |
1156 | cmovncq %r9,%r14 |
1157 | cmovncq %r10,%r15 |
1158 | cmovncq %r11,%r8 |
1159 | |
1160 | decq %rbx |
1161 | jnz .Loop_ord_sqrx |
1162 | |
1163 | movq %rdx,0(%rdi) |
1164 | movq %r14,8(%rdi) |
1165 | pxor %xmm1,%xmm1 |
1166 | movq %r15,16(%rdi) |
1167 | pxor %xmm2,%xmm2 |
1168 | movq %r8,24(%rdi) |
1169 | pxor %xmm3,%xmm3 |
1170 | |
1171 | movq 0(%rsp),%r15 |
1172 | .cfi_restore %r15 |
1173 | movq 8(%rsp),%r14 |
1174 | .cfi_restore %r14 |
1175 | movq 16(%rsp),%r13 |
1176 | .cfi_restore %r13 |
1177 | movq 24(%rsp),%r12 |
1178 | .cfi_restore %r12 |
1179 | movq 32(%rsp),%rbx |
1180 | .cfi_restore %rbx |
1181 | movq 40(%rsp),%rbp |
1182 | .cfi_restore %rbp |
1183 | leaq 48(%rsp),%rsp |
1184 | .cfi_adjust_cfa_offset -48 |
1185 | .Lord_sqrx_epilogue: |
1186 | .byte 0xf3,0xc3 |
1187 | .cfi_endproc |
1188 | .size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx |
1189 | |
1190 | |
1191 | |
1192 | |
1193 | |
1194 | |
1195 | .globl ecp_nistz256_mul_mont |
1196 | .hidden ecp_nistz256_mul_mont |
1197 | .type ecp_nistz256_mul_mont,@function |
1198 | .align 32 |
1199 | ecp_nistz256_mul_mont: |
1200 | .cfi_startproc |
1201 | leaq OPENSSL_ia32cap_P(%rip),%rcx |
1202 | movq 8(%rcx),%rcx |
1203 | andl $0x80100,%ecx |
1204 | .Lmul_mont: |
1205 | pushq %rbp |
1206 | .cfi_adjust_cfa_offset 8 |
1207 | .cfi_offset %rbp,-16 |
1208 | pushq %rbx |
1209 | .cfi_adjust_cfa_offset 8 |
1210 | .cfi_offset %rbx,-24 |
1211 | pushq %r12 |
1212 | .cfi_adjust_cfa_offset 8 |
1213 | .cfi_offset %r12,-32 |
1214 | pushq %r13 |
1215 | .cfi_adjust_cfa_offset 8 |
1216 | .cfi_offset %r13,-40 |
1217 | pushq %r14 |
1218 | .cfi_adjust_cfa_offset 8 |
1219 | .cfi_offset %r14,-48 |
1220 | pushq %r15 |
1221 | .cfi_adjust_cfa_offset 8 |
1222 | .cfi_offset %r15,-56 |
1223 | .Lmul_body: |
1224 | cmpl $0x80100,%ecx |
1225 | je .Lmul_montx |
1226 | movq %rdx,%rbx |
1227 | movq 0(%rdx),%rax |
1228 | movq 0(%rsi),%r9 |
1229 | movq 8(%rsi),%r10 |
1230 | movq 16(%rsi),%r11 |
1231 | movq 24(%rsi),%r12 |
1232 | |
1233 | call __ecp_nistz256_mul_montq |
1234 | jmp .Lmul_mont_done |
1235 | |
1236 | .align 32 |
1237 | .Lmul_montx: |
1238 | movq %rdx,%rbx |
1239 | movq 0(%rdx),%rdx |
1240 | movq 0(%rsi),%r9 |
1241 | movq 8(%rsi),%r10 |
1242 | movq 16(%rsi),%r11 |
1243 | movq 24(%rsi),%r12 |
1244 | leaq -128(%rsi),%rsi |
1245 | |
1246 | call __ecp_nistz256_mul_montx |
1247 | .Lmul_mont_done: |
1248 | movq 0(%rsp),%r15 |
1249 | .cfi_restore %r15 |
1250 | movq 8(%rsp),%r14 |
1251 | .cfi_restore %r14 |
1252 | movq 16(%rsp),%r13 |
1253 | .cfi_restore %r13 |
1254 | movq 24(%rsp),%r12 |
1255 | .cfi_restore %r12 |
1256 | movq 32(%rsp),%rbx |
1257 | .cfi_restore %rbx |
1258 | movq 40(%rsp),%rbp |
1259 | .cfi_restore %rbp |
1260 | leaq 48(%rsp),%rsp |
1261 | .cfi_adjust_cfa_offset -48 |
1262 | .Lmul_epilogue: |
1263 | .byte 0xf3,0xc3 |
1264 | .cfi_endproc |
1265 | .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont |
1266 | |
1267 | .type __ecp_nistz256_mul_montq,@function |
1268 | .align 32 |
1269 | __ecp_nistz256_mul_montq: |
1270 | .cfi_startproc |
1271 | |
1272 | |
1273 | movq %rax,%rbp |
1274 | mulq %r9 |
1275 | movq .Lpoly+8(%rip),%r14 |
1276 | movq %rax,%r8 |
1277 | movq %rbp,%rax |
1278 | movq %rdx,%r9 |
1279 | |
1280 | mulq %r10 |
1281 | movq .Lpoly+24(%rip),%r15 |
1282 | addq %rax,%r9 |
1283 | movq %rbp,%rax |
1284 | adcq $0,%rdx |
1285 | movq %rdx,%r10 |
1286 | |
1287 | mulq %r11 |
1288 | addq %rax,%r10 |
1289 | movq %rbp,%rax |
1290 | adcq $0,%rdx |
1291 | movq %rdx,%r11 |
1292 | |
1293 | mulq %r12 |
1294 | addq %rax,%r11 |
1295 | movq %r8,%rax |
1296 | adcq $0,%rdx |
1297 | xorq %r13,%r13 |
1298 | movq %rdx,%r12 |
1299 | |
1300 | |
1301 | |
1302 | |
1303 | |
1304 | |
1305 | |
1306 | |
1307 | |
1308 | |
1309 | movq %r8,%rbp |
1310 | shlq $32,%r8 |
1311 | mulq %r15 |
1312 | shrq $32,%rbp |
1313 | addq %r8,%r9 |
1314 | adcq %rbp,%r10 |
1315 | adcq %rax,%r11 |
1316 | movq 8(%rbx),%rax |
1317 | adcq %rdx,%r12 |
1318 | adcq $0,%r13 |
1319 | xorq %r8,%r8 |
1320 | |
1321 | |
1322 | |
1323 | movq %rax,%rbp |
1324 | mulq 0(%rsi) |
1325 | addq %rax,%r9 |
1326 | movq %rbp,%rax |
1327 | adcq $0,%rdx |
1328 | movq %rdx,%rcx |
1329 | |
1330 | mulq 8(%rsi) |
1331 | addq %rcx,%r10 |
1332 | adcq $0,%rdx |
1333 | addq %rax,%r10 |
1334 | movq %rbp,%rax |
1335 | adcq $0,%rdx |
1336 | movq %rdx,%rcx |
1337 | |
1338 | mulq 16(%rsi) |
1339 | addq %rcx,%r11 |
1340 | adcq $0,%rdx |
1341 | addq %rax,%r11 |
1342 | movq %rbp,%rax |
1343 | adcq $0,%rdx |
1344 | movq %rdx,%rcx |
1345 | |
1346 | mulq 24(%rsi) |
1347 | addq %rcx,%r12 |
1348 | adcq $0,%rdx |
1349 | addq %rax,%r12 |
1350 | movq %r9,%rax |
1351 | adcq %rdx,%r13 |
1352 | adcq $0,%r8 |
1353 | |
1354 | |
1355 | |
1356 | movq %r9,%rbp |
1357 | shlq $32,%r9 |
1358 | mulq %r15 |
1359 | shrq $32,%rbp |
1360 | addq %r9,%r10 |
1361 | adcq %rbp,%r11 |
1362 | adcq %rax,%r12 |
1363 | movq 16(%rbx),%rax |
1364 | adcq %rdx,%r13 |
1365 | adcq $0,%r8 |
1366 | xorq %r9,%r9 |
1367 | |
1368 | |
1369 | |
1370 | movq %rax,%rbp |
1371 | mulq 0(%rsi) |
1372 | addq %rax,%r10 |
1373 | movq %rbp,%rax |
1374 | adcq $0,%rdx |
1375 | movq %rdx,%rcx |
1376 | |
1377 | mulq 8(%rsi) |
1378 | addq %rcx,%r11 |
1379 | adcq $0,%rdx |
1380 | addq %rax,%r11 |
1381 | movq %rbp,%rax |
1382 | adcq $0,%rdx |
1383 | movq %rdx,%rcx |
1384 | |
1385 | mulq 16(%rsi) |
1386 | addq %rcx,%r12 |
1387 | adcq $0,%rdx |
1388 | addq %rax,%r12 |
1389 | movq %rbp,%rax |
1390 | adcq $0,%rdx |
1391 | movq %rdx,%rcx |
1392 | |
1393 | mulq 24(%rsi) |
1394 | addq %rcx,%r13 |
1395 | adcq $0,%rdx |
1396 | addq %rax,%r13 |
1397 | movq %r10,%rax |
1398 | adcq %rdx,%r8 |
1399 | adcq $0,%r9 |
1400 | |
1401 | |
1402 | |
1403 | movq %r10,%rbp |
1404 | shlq $32,%r10 |
1405 | mulq %r15 |
1406 | shrq $32,%rbp |
1407 | addq %r10,%r11 |
1408 | adcq %rbp,%r12 |
1409 | adcq %rax,%r13 |
1410 | movq 24(%rbx),%rax |
1411 | adcq %rdx,%r8 |
1412 | adcq $0,%r9 |
1413 | xorq %r10,%r10 |
1414 | |
1415 | |
1416 | |
1417 | movq %rax,%rbp |
1418 | mulq 0(%rsi) |
1419 | addq %rax,%r11 |
1420 | movq %rbp,%rax |
1421 | adcq $0,%rdx |
1422 | movq %rdx,%rcx |
1423 | |
1424 | mulq 8(%rsi) |
1425 | addq %rcx,%r12 |
1426 | adcq $0,%rdx |
1427 | addq %rax,%r12 |
1428 | movq %rbp,%rax |
1429 | adcq $0,%rdx |
1430 | movq %rdx,%rcx |
1431 | |
1432 | mulq 16(%rsi) |
1433 | addq %rcx,%r13 |
1434 | adcq $0,%rdx |
1435 | addq %rax,%r13 |
1436 | movq %rbp,%rax |
1437 | adcq $0,%rdx |
1438 | movq %rdx,%rcx |
1439 | |
1440 | mulq 24(%rsi) |
1441 | addq %rcx,%r8 |
1442 | adcq $0,%rdx |
1443 | addq %rax,%r8 |
1444 | movq %r11,%rax |
1445 | adcq %rdx,%r9 |
1446 | adcq $0,%r10 |
1447 | |
1448 | |
1449 | |
1450 | movq %r11,%rbp |
1451 | shlq $32,%r11 |
1452 | mulq %r15 |
1453 | shrq $32,%rbp |
1454 | addq %r11,%r12 |
1455 | adcq %rbp,%r13 |
1456 | movq %r12,%rcx |
1457 | adcq %rax,%r8 |
1458 | adcq %rdx,%r9 |
1459 | movq %r13,%rbp |
1460 | adcq $0,%r10 |
1461 | |
1462 | |
1463 | |
1464 | subq $-1,%r12 |
1465 | movq %r8,%rbx |
1466 | sbbq %r14,%r13 |
1467 | sbbq $0,%r8 |
1468 | movq %r9,%rdx |
1469 | sbbq %r15,%r9 |
1470 | sbbq $0,%r10 |
1471 | |
1472 | cmovcq %rcx,%r12 |
1473 | cmovcq %rbp,%r13 |
1474 | movq %r12,0(%rdi) |
1475 | cmovcq %rbx,%r8 |
1476 | movq %r13,8(%rdi) |
1477 | cmovcq %rdx,%r9 |
1478 | movq %r8,16(%rdi) |
1479 | movq %r9,24(%rdi) |
1480 | |
1481 | .byte 0xf3,0xc3 |
1482 | .cfi_endproc |
1483 | .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq |
1484 | |
1485 | |
1486 | |
1487 | |
1488 | |
1489 | |
1490 | |
1491 | |
1492 | .globl ecp_nistz256_sqr_mont |
1493 | .hidden ecp_nistz256_sqr_mont |
1494 | .type ecp_nistz256_sqr_mont,@function |
1495 | .align 32 |
1496 | ecp_nistz256_sqr_mont: |
1497 | .cfi_startproc |
1498 | leaq OPENSSL_ia32cap_P(%rip),%rcx |
1499 | movq 8(%rcx),%rcx |
1500 | andl $0x80100,%ecx |
1501 | pushq %rbp |
1502 | .cfi_adjust_cfa_offset 8 |
1503 | .cfi_offset %rbp,-16 |
1504 | pushq %rbx |
1505 | .cfi_adjust_cfa_offset 8 |
1506 | .cfi_offset %rbx,-24 |
1507 | pushq %r12 |
1508 | .cfi_adjust_cfa_offset 8 |
1509 | .cfi_offset %r12,-32 |
1510 | pushq %r13 |
1511 | .cfi_adjust_cfa_offset 8 |
1512 | .cfi_offset %r13,-40 |
1513 | pushq %r14 |
1514 | .cfi_adjust_cfa_offset 8 |
1515 | .cfi_offset %r14,-48 |
1516 | pushq %r15 |
1517 | .cfi_adjust_cfa_offset 8 |
1518 | .cfi_offset %r15,-56 |
1519 | .Lsqr_body: |
1520 | cmpl $0x80100,%ecx |
1521 | je .Lsqr_montx |
1522 | movq 0(%rsi),%rax |
1523 | movq 8(%rsi),%r14 |
1524 | movq 16(%rsi),%r15 |
1525 | movq 24(%rsi),%r8 |
1526 | |
1527 | call __ecp_nistz256_sqr_montq |
1528 | jmp .Lsqr_mont_done |
1529 | |
1530 | .align 32 |
1531 | .Lsqr_montx: |
1532 | movq 0(%rsi),%rdx |
1533 | movq 8(%rsi),%r14 |
1534 | movq 16(%rsi),%r15 |
1535 | movq 24(%rsi),%r8 |
1536 | leaq -128(%rsi),%rsi |
1537 | |
1538 | call __ecp_nistz256_sqr_montx |
1539 | .Lsqr_mont_done: |
1540 | movq 0(%rsp),%r15 |
1541 | .cfi_restore %r15 |
1542 | movq 8(%rsp),%r14 |
1543 | .cfi_restore %r14 |
1544 | movq 16(%rsp),%r13 |
1545 | .cfi_restore %r13 |
1546 | movq 24(%rsp),%r12 |
1547 | .cfi_restore %r12 |
1548 | movq 32(%rsp),%rbx |
1549 | .cfi_restore %rbx |
1550 | movq 40(%rsp),%rbp |
1551 | .cfi_restore %rbp |
1552 | leaq 48(%rsp),%rsp |
1553 | .cfi_adjust_cfa_offset -48 |
1554 | .Lsqr_epilogue: |
1555 | .byte 0xf3,0xc3 |
1556 | .cfi_endproc |
1557 | .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont |
1558 | |
1559 | .type __ecp_nistz256_sqr_montq,@function |
1560 | .align 32 |
1561 | __ecp_nistz256_sqr_montq: |
1562 | .cfi_startproc |
1563 | movq %rax,%r13 |
1564 | mulq %r14 |
1565 | movq %rax,%r9 |
1566 | movq %r15,%rax |
1567 | movq %rdx,%r10 |
1568 | |
1569 | mulq %r13 |
1570 | addq %rax,%r10 |
1571 | movq %r8,%rax |
1572 | adcq $0,%rdx |
1573 | movq %rdx,%r11 |
1574 | |
1575 | mulq %r13 |
1576 | addq %rax,%r11 |
1577 | movq %r15,%rax |
1578 | adcq $0,%rdx |
1579 | movq %rdx,%r12 |
1580 | |
1581 | |
1582 | mulq %r14 |
1583 | addq %rax,%r11 |
1584 | movq %r8,%rax |
1585 | adcq $0,%rdx |
1586 | movq %rdx,%rbp |
1587 | |
1588 | mulq %r14 |
1589 | addq %rax,%r12 |
1590 | movq %r8,%rax |
1591 | adcq $0,%rdx |
1592 | addq %rbp,%r12 |
1593 | movq %rdx,%r13 |
1594 | adcq $0,%r13 |
1595 | |
1596 | |
1597 | mulq %r15 |
1598 | xorq %r15,%r15 |
1599 | addq %rax,%r13 |
1600 | movq 0(%rsi),%rax |
1601 | movq %rdx,%r14 |
1602 | adcq $0,%r14 |
1603 | |
1604 | addq %r9,%r9 |
1605 | adcq %r10,%r10 |
1606 | adcq %r11,%r11 |
1607 | adcq %r12,%r12 |
1608 | adcq %r13,%r13 |
1609 | adcq %r14,%r14 |
1610 | adcq $0,%r15 |
1611 | |
1612 | mulq %rax |
1613 | movq %rax,%r8 |
1614 | movq 8(%rsi),%rax |
1615 | movq %rdx,%rcx |
1616 | |
1617 | mulq %rax |
1618 | addq %rcx,%r9 |
1619 | adcq %rax,%r10 |
1620 | movq 16(%rsi),%rax |
1621 | adcq $0,%rdx |
1622 | movq %rdx,%rcx |
1623 | |
1624 | mulq %rax |
1625 | addq %rcx,%r11 |
1626 | adcq %rax,%r12 |
1627 | movq 24(%rsi),%rax |
1628 | adcq $0,%rdx |
1629 | movq %rdx,%rcx |
1630 | |
1631 | mulq %rax |
1632 | addq %rcx,%r13 |
1633 | adcq %rax,%r14 |
1634 | movq %r8,%rax |
1635 | adcq %rdx,%r15 |
1636 | |
1637 | movq .Lpoly+8(%rip),%rsi |
1638 | movq .Lpoly+24(%rip),%rbp |
1639 | |
1640 | |
1641 | |
1642 | |
1643 | movq %r8,%rcx |
1644 | shlq $32,%r8 |
1645 | mulq %rbp |
1646 | shrq $32,%rcx |
1647 | addq %r8,%r9 |
1648 | adcq %rcx,%r10 |
1649 | adcq %rax,%r11 |
1650 | movq %r9,%rax |
1651 | adcq $0,%rdx |
1652 | |
1653 | |
1654 | |
1655 | movq %r9,%rcx |
1656 | shlq $32,%r9 |
1657 | movq %rdx,%r8 |
1658 | mulq %rbp |
1659 | shrq $32,%rcx |
1660 | addq %r9,%r10 |
1661 | adcq %rcx,%r11 |
1662 | adcq %rax,%r8 |
1663 | movq %r10,%rax |
1664 | adcq $0,%rdx |
1665 | |
1666 | |
1667 | |
1668 | movq %r10,%rcx |
1669 | shlq $32,%r10 |
1670 | movq %rdx,%r9 |
1671 | mulq %rbp |
1672 | shrq $32,%rcx |
1673 | addq %r10,%r11 |
1674 | adcq %rcx,%r8 |
1675 | adcq %rax,%r9 |
1676 | movq %r11,%rax |
1677 | adcq $0,%rdx |
1678 | |
1679 | |
1680 | |
1681 | movq %r11,%rcx |
1682 | shlq $32,%r11 |
1683 | movq %rdx,%r10 |
1684 | mulq %rbp |
1685 | shrq $32,%rcx |
1686 | addq %r11,%r8 |
1687 | adcq %rcx,%r9 |
1688 | adcq %rax,%r10 |
1689 | adcq $0,%rdx |
1690 | xorq %r11,%r11 |
1691 | |
1692 | |
1693 | |
1694 | addq %r8,%r12 |
1695 | adcq %r9,%r13 |
1696 | movq %r12,%r8 |
1697 | adcq %r10,%r14 |
1698 | adcq %rdx,%r15 |
1699 | movq %r13,%r9 |
1700 | adcq $0,%r11 |
1701 | |
1702 | subq $-1,%r12 |
1703 | movq %r14,%r10 |
1704 | sbbq %rsi,%r13 |
1705 | sbbq $0,%r14 |
1706 | movq %r15,%rcx |
1707 | sbbq %rbp,%r15 |
1708 | sbbq $0,%r11 |
1709 | |
1710 | cmovcq %r8,%r12 |
1711 | cmovcq %r9,%r13 |
1712 | movq %r12,0(%rdi) |
1713 | cmovcq %r10,%r14 |
1714 | movq %r13,8(%rdi) |
1715 | cmovcq %rcx,%r15 |
1716 | movq %r14,16(%rdi) |
1717 | movq %r15,24(%rdi) |
1718 | |
1719 | .byte 0xf3,0xc3 |
1720 | .cfi_endproc |
1721 | .size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq |
1722 | .type __ecp_nistz256_mul_montx,@function |
1723 | .align 32 |
1724 | __ecp_nistz256_mul_montx: |
1725 | .cfi_startproc |
1726 | |
1727 | |
1728 | mulxq %r9,%r8,%r9 |
1729 | mulxq %r10,%rcx,%r10 |
1730 | movq $32,%r14 |
1731 | xorq %r13,%r13 |
1732 | mulxq %r11,%rbp,%r11 |
1733 | movq .Lpoly+24(%rip),%r15 |
1734 | adcq %rcx,%r9 |
1735 | mulxq %r12,%rcx,%r12 |
1736 | movq %r8,%rdx |
1737 | adcq %rbp,%r10 |
1738 | shlxq %r14,%r8,%rbp |
1739 | adcq %rcx,%r11 |
1740 | shrxq %r14,%r8,%rcx |
1741 | adcq $0,%r12 |
1742 | |
1743 | |
1744 | |
1745 | addq %rbp,%r9 |
1746 | adcq %rcx,%r10 |
1747 | |
1748 | mulxq %r15,%rcx,%rbp |
1749 | movq 8(%rbx),%rdx |
1750 | adcq %rcx,%r11 |
1751 | adcq %rbp,%r12 |
1752 | adcq $0,%r13 |
1753 | xorq %r8,%r8 |
1754 | |
1755 | |
1756 | |
1757 | mulxq 0+128(%rsi),%rcx,%rbp |
1758 | adcxq %rcx,%r9 |
1759 | adoxq %rbp,%r10 |
1760 | |
1761 | mulxq 8+128(%rsi),%rcx,%rbp |
1762 | adcxq %rcx,%r10 |
1763 | adoxq %rbp,%r11 |
1764 | |
1765 | mulxq 16+128(%rsi),%rcx,%rbp |
1766 | adcxq %rcx,%r11 |
1767 | adoxq %rbp,%r12 |
1768 | |
1769 | mulxq 24+128(%rsi),%rcx,%rbp |
1770 | movq %r9,%rdx |
1771 | adcxq %rcx,%r12 |
1772 | shlxq %r14,%r9,%rcx |
1773 | adoxq %rbp,%r13 |
1774 | shrxq %r14,%r9,%rbp |
1775 | |
1776 | adcxq %r8,%r13 |
1777 | adoxq %r8,%r8 |
1778 | adcq $0,%r8 |
1779 | |
1780 | |
1781 | |
1782 | addq %rcx,%r10 |
1783 | adcq %rbp,%r11 |
1784 | |
1785 | mulxq %r15,%rcx,%rbp |
1786 | movq 16(%rbx),%rdx |
1787 | adcq %rcx,%r12 |
1788 | adcq %rbp,%r13 |
1789 | adcq $0,%r8 |
1790 | xorq %r9,%r9 |
1791 | |
1792 | |
1793 | |
1794 | mulxq 0+128(%rsi),%rcx,%rbp |
1795 | adcxq %rcx,%r10 |
1796 | adoxq %rbp,%r11 |
1797 | |
1798 | mulxq 8+128(%rsi),%rcx,%rbp |
1799 | adcxq %rcx,%r11 |
1800 | adoxq %rbp,%r12 |
1801 | |
1802 | mulxq 16+128(%rsi),%rcx,%rbp |
1803 | adcxq %rcx,%r12 |
1804 | adoxq %rbp,%r13 |
1805 | |
1806 | mulxq 24+128(%rsi),%rcx,%rbp |
1807 | movq %r10,%rdx |
1808 | adcxq %rcx,%r13 |
1809 | shlxq %r14,%r10,%rcx |
1810 | adoxq %rbp,%r8 |
1811 | shrxq %r14,%r10,%rbp |
1812 | |
1813 | adcxq %r9,%r8 |
1814 | adoxq %r9,%r9 |
1815 | adcq $0,%r9 |
1816 | |
1817 | |
1818 | |
1819 | addq %rcx,%r11 |
1820 | adcq %rbp,%r12 |
1821 | |
1822 | mulxq %r15,%rcx,%rbp |
1823 | movq 24(%rbx),%rdx |
1824 | adcq %rcx,%r13 |
1825 | adcq %rbp,%r8 |
1826 | adcq $0,%r9 |
1827 | xorq %r10,%r10 |
1828 | |
1829 | |
1830 | |
1831 | mulxq 0+128(%rsi),%rcx,%rbp |
1832 | adcxq %rcx,%r11 |
1833 | adoxq %rbp,%r12 |
1834 | |
1835 | mulxq 8+128(%rsi),%rcx,%rbp |
1836 | adcxq %rcx,%r12 |
1837 | adoxq %rbp,%r13 |
1838 | |
1839 | mulxq 16+128(%rsi),%rcx,%rbp |
1840 | adcxq %rcx,%r13 |
1841 | adoxq %rbp,%r8 |
1842 | |
1843 | mulxq 24+128(%rsi),%rcx,%rbp |
1844 | movq %r11,%rdx |
1845 | adcxq %rcx,%r8 |
1846 | shlxq %r14,%r11,%rcx |
1847 | adoxq %rbp,%r9 |
1848 | shrxq %r14,%r11,%rbp |
1849 | |
1850 | adcxq %r10,%r9 |
1851 | adoxq %r10,%r10 |
1852 | adcq $0,%r10 |
1853 | |
1854 | |
1855 | |
1856 | addq %rcx,%r12 |
1857 | adcq %rbp,%r13 |
1858 | |
1859 | mulxq %r15,%rcx,%rbp |
1860 | movq %r12,%rbx |
1861 | movq .Lpoly+8(%rip),%r14 |
1862 | adcq %rcx,%r8 |
1863 | movq %r13,%rdx |
1864 | adcq %rbp,%r9 |
1865 | adcq $0,%r10 |
1866 | |
1867 | |
1868 | |
1869 | xorl %eax,%eax |
1870 | movq %r8,%rcx |
1871 | sbbq $-1,%r12 |
1872 | sbbq %r14,%r13 |
1873 | sbbq $0,%r8 |
1874 | movq %r9,%rbp |
1875 | sbbq %r15,%r9 |
1876 | sbbq $0,%r10 |
1877 | |
1878 | cmovcq %rbx,%r12 |
1879 | cmovcq %rdx,%r13 |
1880 | movq %r12,0(%rdi) |
1881 | cmovcq %rcx,%r8 |
1882 | movq %r13,8(%rdi) |
1883 | cmovcq %rbp,%r9 |
1884 | movq %r8,16(%rdi) |
1885 | movq %r9,24(%rdi) |
1886 | |
1887 | .byte 0xf3,0xc3 |
1888 | .cfi_endproc |
1889 | .size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx |
1890 | |
1891 | .type __ecp_nistz256_sqr_montx,@function |
1892 | .align 32 |
1893 | __ecp_nistz256_sqr_montx: |
1894 | .cfi_startproc |
1895 | mulxq %r14,%r9,%r10 |
1896 | mulxq %r15,%rcx,%r11 |
1897 | xorl %eax,%eax |
1898 | adcq %rcx,%r10 |
1899 | mulxq %r8,%rbp,%r12 |
1900 | movq %r14,%rdx |
1901 | adcq %rbp,%r11 |
1902 | adcq $0,%r12 |
1903 | xorq %r13,%r13 |
1904 | |
1905 | |
1906 | mulxq %r15,%rcx,%rbp |
1907 | adcxq %rcx,%r11 |
1908 | adoxq %rbp,%r12 |
1909 | |
1910 | mulxq %r8,%rcx,%rbp |
1911 | movq %r15,%rdx |
1912 | adcxq %rcx,%r12 |
1913 | adoxq %rbp,%r13 |
1914 | adcq $0,%r13 |
1915 | |
1916 | |
1917 | mulxq %r8,%rcx,%r14 |
1918 | movq 0+128(%rsi),%rdx |
1919 | xorq %r15,%r15 |
1920 | adcxq %r9,%r9 |
1921 | adoxq %rcx,%r13 |
1922 | adcxq %r10,%r10 |
1923 | adoxq %r15,%r14 |
1924 | |
1925 | mulxq %rdx,%r8,%rbp |
1926 | movq 8+128(%rsi),%rdx |
1927 | adcxq %r11,%r11 |
1928 | adoxq %rbp,%r9 |
1929 | adcxq %r12,%r12 |
1930 | mulxq %rdx,%rcx,%rax |
1931 | movq 16+128(%rsi),%rdx |
1932 | adcxq %r13,%r13 |
1933 | adoxq %rcx,%r10 |
1934 | adcxq %r14,%r14 |
1935 | .byte 0x67 |
1936 | mulxq %rdx,%rcx,%rbp |
1937 | movq 24+128(%rsi),%rdx |
1938 | adoxq %rax,%r11 |
1939 | adcxq %r15,%r15 |
1940 | adoxq %rcx,%r12 |
1941 | movq $32,%rsi |
1942 | adoxq %rbp,%r13 |
1943 | .byte 0x67,0x67 |
1944 | mulxq %rdx,%rcx,%rax |
1945 | movq .Lpoly+24(%rip),%rdx |
1946 | adoxq %rcx,%r14 |
1947 | shlxq %rsi,%r8,%rcx |
1948 | adoxq %rax,%r15 |
1949 | shrxq %rsi,%r8,%rax |
1950 | movq %rdx,%rbp |
1951 | |
1952 | |
1953 | addq %rcx,%r9 |
1954 | adcq %rax,%r10 |
1955 | |
1956 | mulxq %r8,%rcx,%r8 |
1957 | adcq %rcx,%r11 |
1958 | shlxq %rsi,%r9,%rcx |
1959 | adcq $0,%r8 |
1960 | shrxq %rsi,%r9,%rax |
1961 | |
1962 | |
1963 | addq %rcx,%r10 |
1964 | adcq %rax,%r11 |
1965 | |
1966 | mulxq %r9,%rcx,%r9 |
1967 | adcq %rcx,%r8 |
1968 | shlxq %rsi,%r10,%rcx |
1969 | adcq $0,%r9 |
1970 | shrxq %rsi,%r10,%rax |
1971 | |
1972 | |
1973 | addq %rcx,%r11 |
1974 | adcq %rax,%r8 |
1975 | |
1976 | mulxq %r10,%rcx,%r10 |
1977 | adcq %rcx,%r9 |
1978 | shlxq %rsi,%r11,%rcx |
1979 | adcq $0,%r10 |
1980 | shrxq %rsi,%r11,%rax |
1981 | |
1982 | |
1983 | addq %rcx,%r8 |
1984 | adcq %rax,%r9 |
1985 | |
1986 | mulxq %r11,%rcx,%r11 |
1987 | adcq %rcx,%r10 |
1988 | adcq $0,%r11 |
1989 | |
1990 | xorq %rdx,%rdx |
1991 | addq %r8,%r12 |
1992 | movq .Lpoly+8(%rip),%rsi |
1993 | adcq %r9,%r13 |
1994 | movq %r12,%r8 |
1995 | adcq %r10,%r14 |
1996 | adcq %r11,%r15 |
1997 | movq %r13,%r9 |
1998 | adcq $0,%rdx |
1999 | |
2000 | subq $-1,%r12 |
2001 | movq %r14,%r10 |
2002 | sbbq %rsi,%r13 |
2003 | sbbq $0,%r14 |
2004 | movq %r15,%r11 |
2005 | sbbq %rbp,%r15 |
2006 | sbbq $0,%rdx |
2007 | |
2008 | cmovcq %r8,%r12 |
2009 | cmovcq %r9,%r13 |
2010 | movq %r12,0(%rdi) |
2011 | cmovcq %r10,%r14 |
2012 | movq %r13,8(%rdi) |
2013 | cmovcq %r11,%r15 |
2014 | movq %r14,16(%rdi) |
2015 | movq %r15,24(%rdi) |
2016 | |
2017 | .byte 0xf3,0xc3 |
2018 | .cfi_endproc |
2019 | .size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx |
2020 | |
2021 | |
2022 | .globl ecp_nistz256_select_w5 |
2023 | .hidden ecp_nistz256_select_w5 |
2024 | .type ecp_nistz256_select_w5,@function |
2025 | .align 32 |
2026 | ecp_nistz256_select_w5: |
2027 | .cfi_startproc |
2028 | leaq OPENSSL_ia32cap_P(%rip),%rax |
2029 | movq 8(%rax),%rax |
2030 | testl $32,%eax |
2031 | jnz .Lavx2_select_w5 |
2032 | movdqa .LOne(%rip),%xmm0 |
2033 | movd %edx,%xmm1 |
2034 | |
2035 | pxor %xmm2,%xmm2 |
2036 | pxor %xmm3,%xmm3 |
2037 | pxor %xmm4,%xmm4 |
2038 | pxor %xmm5,%xmm5 |
2039 | pxor %xmm6,%xmm6 |
2040 | pxor %xmm7,%xmm7 |
2041 | |
2042 | movdqa %xmm0,%xmm8 |
2043 | pshufd $0,%xmm1,%xmm1 |
2044 | |
2045 | movq $16,%rax |
2046 | .Lselect_loop_sse_w5: |
2047 | |
2048 | movdqa %xmm8,%xmm15 |
2049 | paddd %xmm0,%xmm8 |
2050 | pcmpeqd %xmm1,%xmm15 |
2051 | |
2052 | movdqa 0(%rsi),%xmm9 |
2053 | movdqa 16(%rsi),%xmm10 |
2054 | movdqa 32(%rsi),%xmm11 |
2055 | movdqa 48(%rsi),%xmm12 |
2056 | movdqa 64(%rsi),%xmm13 |
2057 | movdqa 80(%rsi),%xmm14 |
2058 | leaq 96(%rsi),%rsi |
2059 | |
2060 | pand %xmm15,%xmm9 |
2061 | pand %xmm15,%xmm10 |
2062 | por %xmm9,%xmm2 |
2063 | pand %xmm15,%xmm11 |
2064 | por %xmm10,%xmm3 |
2065 | pand %xmm15,%xmm12 |
2066 | por %xmm11,%xmm4 |
2067 | pand %xmm15,%xmm13 |
2068 | por %xmm12,%xmm5 |
2069 | pand %xmm15,%xmm14 |
2070 | por %xmm13,%xmm6 |
2071 | por %xmm14,%xmm7 |
2072 | |
2073 | decq %rax |
2074 | jnz .Lselect_loop_sse_w5 |
2075 | |
2076 | movdqu %xmm2,0(%rdi) |
2077 | movdqu %xmm3,16(%rdi) |
2078 | movdqu %xmm4,32(%rdi) |
2079 | movdqu %xmm5,48(%rdi) |
2080 | movdqu %xmm6,64(%rdi) |
2081 | movdqu %xmm7,80(%rdi) |
2082 | .byte 0xf3,0xc3 |
2083 | .cfi_endproc |
2084 | .LSEH_end_ecp_nistz256_select_w5: |
2085 | .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 |
2086 | |
2087 | |
2088 | |
2089 | .globl ecp_nistz256_select_w7 |
2090 | .hidden ecp_nistz256_select_w7 |
2091 | .type ecp_nistz256_select_w7,@function |
2092 | .align 32 |
2093 | ecp_nistz256_select_w7: |
2094 | .cfi_startproc |
2095 | leaq OPENSSL_ia32cap_P(%rip),%rax |
2096 | movq 8(%rax),%rax |
2097 | testl $32,%eax |
2098 | jnz .Lavx2_select_w7 |
2099 | movdqa .LOne(%rip),%xmm8 |
2100 | movd %edx,%xmm1 |
2101 | |
2102 | pxor %xmm2,%xmm2 |
2103 | pxor %xmm3,%xmm3 |
2104 | pxor %xmm4,%xmm4 |
2105 | pxor %xmm5,%xmm5 |
2106 | |
2107 | movdqa %xmm8,%xmm0 |
2108 | pshufd $0,%xmm1,%xmm1 |
2109 | movq $64,%rax |
2110 | |
2111 | .Lselect_loop_sse_w7: |
2112 | movdqa %xmm8,%xmm15 |
2113 | paddd %xmm0,%xmm8 |
2114 | movdqa 0(%rsi),%xmm9 |
2115 | movdqa 16(%rsi),%xmm10 |
2116 | pcmpeqd %xmm1,%xmm15 |
2117 | movdqa 32(%rsi),%xmm11 |
2118 | movdqa 48(%rsi),%xmm12 |
2119 | leaq 64(%rsi),%rsi |
2120 | |
2121 | pand %xmm15,%xmm9 |
2122 | pand %xmm15,%xmm10 |
2123 | por %xmm9,%xmm2 |
2124 | pand %xmm15,%xmm11 |
2125 | por %xmm10,%xmm3 |
2126 | pand %xmm15,%xmm12 |
2127 | por %xmm11,%xmm4 |
2128 | prefetcht0 255(%rsi) |
2129 | por %xmm12,%xmm5 |
2130 | |
2131 | decq %rax |
2132 | jnz .Lselect_loop_sse_w7 |
2133 | |
2134 | movdqu %xmm2,0(%rdi) |
2135 | movdqu %xmm3,16(%rdi) |
2136 | movdqu %xmm4,32(%rdi) |
2137 | movdqu %xmm5,48(%rdi) |
2138 | .byte 0xf3,0xc3 |
2139 | .cfi_endproc |
2140 | .LSEH_end_ecp_nistz256_select_w7: |
2141 | .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 |
2142 | |
2143 | |
2144 | .type ecp_nistz256_avx2_select_w5,@function |
2145 | .align 32 |
2146 | ecp_nistz256_avx2_select_w5: |
2147 | .cfi_startproc |
2148 | .Lavx2_select_w5: |
2149 | vzeroupper |
2150 | vmovdqa .LTwo(%rip),%ymm0 |
2151 | |
2152 | vpxor %ymm2,%ymm2,%ymm2 |
2153 | vpxor %ymm3,%ymm3,%ymm3 |
2154 | vpxor %ymm4,%ymm4,%ymm4 |
2155 | |
2156 | vmovdqa .LOne(%rip),%ymm5 |
2157 | vmovdqa .LTwo(%rip),%ymm10 |
2158 | |
2159 | vmovd %edx,%xmm1 |
2160 | vpermd %ymm1,%ymm2,%ymm1 |
2161 | |
2162 | movq $8,%rax |
2163 | .Lselect_loop_avx2_w5: |
2164 | |
2165 | vmovdqa 0(%rsi),%ymm6 |
2166 | vmovdqa 32(%rsi),%ymm7 |
2167 | vmovdqa 64(%rsi),%ymm8 |
2168 | |
2169 | vmovdqa 96(%rsi),%ymm11 |
2170 | vmovdqa 128(%rsi),%ymm12 |
2171 | vmovdqa 160(%rsi),%ymm13 |
2172 | |
2173 | vpcmpeqd %ymm1,%ymm5,%ymm9 |
2174 | vpcmpeqd %ymm1,%ymm10,%ymm14 |
2175 | |
2176 | vpaddd %ymm0,%ymm5,%ymm5 |
2177 | vpaddd %ymm0,%ymm10,%ymm10 |
2178 | leaq 192(%rsi),%rsi |
2179 | |
2180 | vpand %ymm9,%ymm6,%ymm6 |
2181 | vpand %ymm9,%ymm7,%ymm7 |
2182 | vpand %ymm9,%ymm8,%ymm8 |
2183 | vpand %ymm14,%ymm11,%ymm11 |
2184 | vpand %ymm14,%ymm12,%ymm12 |
2185 | vpand %ymm14,%ymm13,%ymm13 |
2186 | |
2187 | vpxor %ymm6,%ymm2,%ymm2 |
2188 | vpxor %ymm7,%ymm3,%ymm3 |
2189 | vpxor %ymm8,%ymm4,%ymm4 |
2190 | vpxor %ymm11,%ymm2,%ymm2 |
2191 | vpxor %ymm12,%ymm3,%ymm3 |
2192 | vpxor %ymm13,%ymm4,%ymm4 |
2193 | |
2194 | decq %rax |
2195 | jnz .Lselect_loop_avx2_w5 |
2196 | |
2197 | vmovdqu %ymm2,0(%rdi) |
2198 | vmovdqu %ymm3,32(%rdi) |
2199 | vmovdqu %ymm4,64(%rdi) |
2200 | vzeroupper |
2201 | .byte 0xf3,0xc3 |
2202 | .cfi_endproc |
2203 | .LSEH_end_ecp_nistz256_avx2_select_w5: |
2204 | .size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 |
2205 | |
2206 | |
2207 | |
2208 | .globl ecp_nistz256_avx2_select_w7 |
2209 | .hidden ecp_nistz256_avx2_select_w7 |
2210 | .type ecp_nistz256_avx2_select_w7,@function |
2211 | .align 32 |
2212 | ecp_nistz256_avx2_select_w7: |
2213 | .cfi_startproc |
2214 | .Lavx2_select_w7: |
2215 | vzeroupper |
2216 | vmovdqa .LThree(%rip),%ymm0 |
2217 | |
2218 | vpxor %ymm2,%ymm2,%ymm2 |
2219 | vpxor %ymm3,%ymm3,%ymm3 |
2220 | |
2221 | vmovdqa .LOne(%rip),%ymm4 |
2222 | vmovdqa .LTwo(%rip),%ymm8 |
2223 | vmovdqa .LThree(%rip),%ymm12 |
2224 | |
2225 | vmovd %edx,%xmm1 |
2226 | vpermd %ymm1,%ymm2,%ymm1 |
2227 | |
2228 | |
2229 | movq $21,%rax |
2230 | .Lselect_loop_avx2_w7: |
2231 | |
2232 | vmovdqa 0(%rsi),%ymm5 |
2233 | vmovdqa 32(%rsi),%ymm6 |
2234 | |
2235 | vmovdqa 64(%rsi),%ymm9 |
2236 | vmovdqa 96(%rsi),%ymm10 |
2237 | |
2238 | vmovdqa 128(%rsi),%ymm13 |
2239 | vmovdqa 160(%rsi),%ymm14 |
2240 | |
2241 | vpcmpeqd %ymm1,%ymm4,%ymm7 |
2242 | vpcmpeqd %ymm1,%ymm8,%ymm11 |
2243 | vpcmpeqd %ymm1,%ymm12,%ymm15 |
2244 | |
2245 | vpaddd %ymm0,%ymm4,%ymm4 |
2246 | vpaddd %ymm0,%ymm8,%ymm8 |
2247 | vpaddd %ymm0,%ymm12,%ymm12 |
2248 | leaq 192(%rsi),%rsi |
2249 | |
2250 | vpand %ymm7,%ymm5,%ymm5 |
2251 | vpand %ymm7,%ymm6,%ymm6 |
2252 | vpand %ymm11,%ymm9,%ymm9 |
2253 | vpand %ymm11,%ymm10,%ymm10 |
2254 | vpand %ymm15,%ymm13,%ymm13 |
2255 | vpand %ymm15,%ymm14,%ymm14 |
2256 | |
2257 | vpxor %ymm5,%ymm2,%ymm2 |
2258 | vpxor %ymm6,%ymm3,%ymm3 |
2259 | vpxor %ymm9,%ymm2,%ymm2 |
2260 | vpxor %ymm10,%ymm3,%ymm3 |
2261 | vpxor %ymm13,%ymm2,%ymm2 |
2262 | vpxor %ymm14,%ymm3,%ymm3 |
2263 | |
2264 | decq %rax |
2265 | jnz .Lselect_loop_avx2_w7 |
2266 | |
2267 | |
2268 | vmovdqa 0(%rsi),%ymm5 |
2269 | vmovdqa 32(%rsi),%ymm6 |
2270 | |
2271 | vpcmpeqd %ymm1,%ymm4,%ymm7 |
2272 | |
2273 | vpand %ymm7,%ymm5,%ymm5 |
2274 | vpand %ymm7,%ymm6,%ymm6 |
2275 | |
2276 | vpxor %ymm5,%ymm2,%ymm2 |
2277 | vpxor %ymm6,%ymm3,%ymm3 |
2278 | |
2279 | vmovdqu %ymm2,0(%rdi) |
2280 | vmovdqu %ymm3,32(%rdi) |
2281 | vzeroupper |
2282 | .byte 0xf3,0xc3 |
2283 | .cfi_endproc |
2284 | .LSEH_end_ecp_nistz256_avx2_select_w7: |
2285 | .size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 |
2286 | .type __ecp_nistz256_add_toq,@function |
2287 | .align 32 |
2288 | __ecp_nistz256_add_toq: |
2289 | .cfi_startproc |
2290 | xorq %r11,%r11 |
2291 | addq 0(%rbx),%r12 |
2292 | adcq 8(%rbx),%r13 |
2293 | movq %r12,%rax |
2294 | adcq 16(%rbx),%r8 |
2295 | adcq 24(%rbx),%r9 |
2296 | movq %r13,%rbp |
2297 | adcq $0,%r11 |
2298 | |
2299 | subq $-1,%r12 |
2300 | movq %r8,%rcx |
2301 | sbbq %r14,%r13 |
2302 | sbbq $0,%r8 |
2303 | movq %r9,%r10 |
2304 | sbbq %r15,%r9 |
2305 | sbbq $0,%r11 |
2306 | |
2307 | cmovcq %rax,%r12 |
2308 | cmovcq %rbp,%r13 |
2309 | movq %r12,0(%rdi) |
2310 | cmovcq %rcx,%r8 |
2311 | movq %r13,8(%rdi) |
2312 | cmovcq %r10,%r9 |
2313 | movq %r8,16(%rdi) |
2314 | movq %r9,24(%rdi) |
2315 | |
2316 | .byte 0xf3,0xc3 |
2317 | .cfi_endproc |
2318 | .size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq |
2319 | |
2320 | .type __ecp_nistz256_sub_fromq,@function |
2321 | .align 32 |
2322 | __ecp_nistz256_sub_fromq: |
2323 | .cfi_startproc |
2324 | subq 0(%rbx),%r12 |
2325 | sbbq 8(%rbx),%r13 |
2326 | movq %r12,%rax |
2327 | sbbq 16(%rbx),%r8 |
2328 | sbbq 24(%rbx),%r9 |
2329 | movq %r13,%rbp |
2330 | sbbq %r11,%r11 |
2331 | |
2332 | addq $-1,%r12 |
2333 | movq %r8,%rcx |
2334 | adcq %r14,%r13 |
2335 | adcq $0,%r8 |
2336 | movq %r9,%r10 |
2337 | adcq %r15,%r9 |
2338 | testq %r11,%r11 |
2339 | |
2340 | cmovzq %rax,%r12 |
2341 | cmovzq %rbp,%r13 |
2342 | movq %r12,0(%rdi) |
2343 | cmovzq %rcx,%r8 |
2344 | movq %r13,8(%rdi) |
2345 | cmovzq %r10,%r9 |
2346 | movq %r8,16(%rdi) |
2347 | movq %r9,24(%rdi) |
2348 | |
2349 | .byte 0xf3,0xc3 |
2350 | .cfi_endproc |
2351 | .size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq |
2352 | |
2353 | .type __ecp_nistz256_subq,@function |
2354 | .align 32 |
2355 | __ecp_nistz256_subq: |
2356 | .cfi_startproc |
2357 | subq %r12,%rax |
2358 | sbbq %r13,%rbp |
2359 | movq %rax,%r12 |
2360 | sbbq %r8,%rcx |
2361 | sbbq %r9,%r10 |
2362 | movq %rbp,%r13 |
2363 | sbbq %r11,%r11 |
2364 | |
2365 | addq $-1,%rax |
2366 | movq %rcx,%r8 |
2367 | adcq %r14,%rbp |
2368 | adcq $0,%rcx |
2369 | movq %r10,%r9 |
2370 | adcq %r15,%r10 |
2371 | testq %r11,%r11 |
2372 | |
2373 | cmovnzq %rax,%r12 |
2374 | cmovnzq %rbp,%r13 |
2375 | cmovnzq %rcx,%r8 |
2376 | cmovnzq %r10,%r9 |
2377 | |
2378 | .byte 0xf3,0xc3 |
2379 | .cfi_endproc |
2380 | .size __ecp_nistz256_subq,.-__ecp_nistz256_subq |
2381 | |
2382 | .type __ecp_nistz256_mul_by_2q,@function |
2383 | .align 32 |
2384 | __ecp_nistz256_mul_by_2q: |
2385 | .cfi_startproc |
2386 | xorq %r11,%r11 |
2387 | addq %r12,%r12 |
2388 | adcq %r13,%r13 |
2389 | movq %r12,%rax |
2390 | adcq %r8,%r8 |
2391 | adcq %r9,%r9 |
2392 | movq %r13,%rbp |
2393 | adcq $0,%r11 |
2394 | |
2395 | subq $-1,%r12 |
2396 | movq %r8,%rcx |
2397 | sbbq %r14,%r13 |
2398 | sbbq $0,%r8 |
2399 | movq %r9,%r10 |
2400 | sbbq %r15,%r9 |
2401 | sbbq $0,%r11 |
2402 | |
2403 | cmovcq %rax,%r12 |
2404 | cmovcq %rbp,%r13 |
2405 | movq %r12,0(%rdi) |
2406 | cmovcq %rcx,%r8 |
2407 | movq %r13,8(%rdi) |
2408 | cmovcq %r10,%r9 |
2409 | movq %r8,16(%rdi) |
2410 | movq %r9,24(%rdi) |
2411 | |
2412 | .byte 0xf3,0xc3 |
2413 | .cfi_endproc |
2414 | .size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q |
2415 | .globl ecp_nistz256_point_double |
2416 | .hidden ecp_nistz256_point_double |
2417 | .type ecp_nistz256_point_double,@function |
2418 | .align 32 |
2419 | ecp_nistz256_point_double: |
2420 | .cfi_startproc |
2421 | leaq OPENSSL_ia32cap_P(%rip),%rcx |
2422 | movq 8(%rcx),%rcx |
2423 | andl $0x80100,%ecx |
2424 | cmpl $0x80100,%ecx |
2425 | je .Lpoint_doublex |
2426 | pushq %rbp |
2427 | .cfi_adjust_cfa_offset 8 |
2428 | .cfi_offset %rbp,-16 |
2429 | pushq %rbx |
2430 | .cfi_adjust_cfa_offset 8 |
2431 | .cfi_offset %rbx,-24 |
2432 | pushq %r12 |
2433 | .cfi_adjust_cfa_offset 8 |
2434 | .cfi_offset %r12,-32 |
2435 | pushq %r13 |
2436 | .cfi_adjust_cfa_offset 8 |
2437 | .cfi_offset %r13,-40 |
2438 | pushq %r14 |
2439 | .cfi_adjust_cfa_offset 8 |
2440 | .cfi_offset %r14,-48 |
2441 | pushq %r15 |
2442 | .cfi_adjust_cfa_offset 8 |
2443 | .cfi_offset %r15,-56 |
2444 | subq $160+8,%rsp |
2445 | .cfi_adjust_cfa_offset 32*5+8 |
2446 | .Lpoint_doubleq_body: |
2447 | |
2448 | .Lpoint_double_shortcutq: |
2449 | movdqu 0(%rsi),%xmm0 |
2450 | movq %rsi,%rbx |
2451 | movdqu 16(%rsi),%xmm1 |
2452 | movq 32+0(%rsi),%r12 |
2453 | movq 32+8(%rsi),%r13 |
2454 | movq 32+16(%rsi),%r8 |
2455 | movq 32+24(%rsi),%r9 |
2456 | movq .Lpoly+8(%rip),%r14 |
2457 | movq .Lpoly+24(%rip),%r15 |
2458 | movdqa %xmm0,96(%rsp) |
2459 | movdqa %xmm1,96+16(%rsp) |
2460 | leaq 32(%rdi),%r10 |
2461 | leaq 64(%rdi),%r11 |
2462 | .byte 102,72,15,110,199 |
2463 | .byte 102,73,15,110,202 |
2464 | .byte 102,73,15,110,211 |
2465 | |
2466 | leaq 0(%rsp),%rdi |
2467 | call __ecp_nistz256_mul_by_2q |
2468 | |
2469 | movq 64+0(%rsi),%rax |
2470 | movq 64+8(%rsi),%r14 |
2471 | movq 64+16(%rsi),%r15 |
2472 | movq 64+24(%rsi),%r8 |
2473 | leaq 64-0(%rsi),%rsi |
2474 | leaq 64(%rsp),%rdi |
2475 | call __ecp_nistz256_sqr_montq |
2476 | |
2477 | movq 0+0(%rsp),%rax |
2478 | movq 8+0(%rsp),%r14 |
2479 | leaq 0+0(%rsp),%rsi |
2480 | movq 16+0(%rsp),%r15 |
2481 | movq 24+0(%rsp),%r8 |
2482 | leaq 0(%rsp),%rdi |
2483 | call __ecp_nistz256_sqr_montq |
2484 | |
2485 | movq 32(%rbx),%rax |
2486 | movq 64+0(%rbx),%r9 |
2487 | movq 64+8(%rbx),%r10 |
2488 | movq 64+16(%rbx),%r11 |
2489 | movq 64+24(%rbx),%r12 |
2490 | leaq 64-0(%rbx),%rsi |
2491 | leaq 32(%rbx),%rbx |
2492 | .byte 102,72,15,126,215 |
2493 | call __ecp_nistz256_mul_montq |
2494 | call __ecp_nistz256_mul_by_2q |
2495 | |
2496 | movq 96+0(%rsp),%r12 |
2497 | movq 96+8(%rsp),%r13 |
2498 | leaq 64(%rsp),%rbx |
2499 | movq 96+16(%rsp),%r8 |
2500 | movq 96+24(%rsp),%r9 |
2501 | leaq 32(%rsp),%rdi |
2502 | call __ecp_nistz256_add_toq |
2503 | |
2504 | movq 96+0(%rsp),%r12 |
2505 | movq 96+8(%rsp),%r13 |
2506 | leaq 64(%rsp),%rbx |
2507 | movq 96+16(%rsp),%r8 |
2508 | movq 96+24(%rsp),%r9 |
2509 | leaq 64(%rsp),%rdi |
2510 | call __ecp_nistz256_sub_fromq |
2511 | |
2512 | movq 0+0(%rsp),%rax |
2513 | movq 8+0(%rsp),%r14 |
2514 | leaq 0+0(%rsp),%rsi |
2515 | movq 16+0(%rsp),%r15 |
2516 | movq 24+0(%rsp),%r8 |
2517 | .byte 102,72,15,126,207 |
2518 | call __ecp_nistz256_sqr_montq |
2519 | xorq %r9,%r9 |
2520 | movq %r12,%rax |
2521 | addq $-1,%r12 |
2522 | movq %r13,%r10 |
2523 | adcq %rsi,%r13 |
2524 | movq %r14,%rcx |
2525 | adcq $0,%r14 |
2526 | movq %r15,%r8 |
2527 | adcq %rbp,%r15 |
2528 | adcq $0,%r9 |
2529 | xorq %rsi,%rsi |
2530 | testq $1,%rax |
2531 | |
2532 | cmovzq %rax,%r12 |
2533 | cmovzq %r10,%r13 |
2534 | cmovzq %rcx,%r14 |
2535 | cmovzq %r8,%r15 |
2536 | cmovzq %rsi,%r9 |
2537 | |
2538 | movq %r13,%rax |
2539 | shrq $1,%r12 |
2540 | shlq $63,%rax |
2541 | movq %r14,%r10 |
2542 | shrq $1,%r13 |
2543 | orq %rax,%r12 |
2544 | shlq $63,%r10 |
2545 | movq %r15,%rcx |
2546 | shrq $1,%r14 |
2547 | orq %r10,%r13 |
2548 | shlq $63,%rcx |
2549 | movq %r12,0(%rdi) |
2550 | shrq $1,%r15 |
2551 | movq %r13,8(%rdi) |
2552 | shlq $63,%r9 |
2553 | orq %rcx,%r14 |
2554 | orq %r9,%r15 |
2555 | movq %r14,16(%rdi) |
2556 | movq %r15,24(%rdi) |
2557 | movq 64(%rsp),%rax |
2558 | leaq 64(%rsp),%rbx |
2559 | movq 0+32(%rsp),%r9 |
2560 | movq 8+32(%rsp),%r10 |
2561 | leaq 0+32(%rsp),%rsi |
2562 | movq 16+32(%rsp),%r11 |
2563 | movq 24+32(%rsp),%r12 |
2564 | leaq 32(%rsp),%rdi |
2565 | call __ecp_nistz256_mul_montq |
2566 | |
2567 | leaq 128(%rsp),%rdi |
2568 | call __ecp_nistz256_mul_by_2q |
2569 | |
2570 | leaq 32(%rsp),%rbx |
2571 | leaq 32(%rsp),%rdi |
2572 | call __ecp_nistz256_add_toq |
2573 | |
2574 | movq 96(%rsp),%rax |
2575 | leaq 96(%rsp),%rbx |
2576 | movq 0+0(%rsp),%r9 |
2577 | movq 8+0(%rsp),%r10 |
2578 | leaq 0+0(%rsp),%rsi |
2579 | movq 16+0(%rsp),%r11 |
2580 | movq 24+0(%rsp),%r12 |
2581 | leaq 0(%rsp),%rdi |
2582 | call __ecp_nistz256_mul_montq |
2583 | |
2584 | leaq 128(%rsp),%rdi |
2585 | call __ecp_nistz256_mul_by_2q |
2586 | |
2587 | movq 0+32(%rsp),%rax |
2588 | movq 8+32(%rsp),%r14 |
2589 | leaq 0+32(%rsp),%rsi |
2590 | movq 16+32(%rsp),%r15 |
2591 | movq 24+32(%rsp),%r8 |
2592 | .byte 102,72,15,126,199 |
2593 | call __ecp_nistz256_sqr_montq |
2594 | |
2595 | leaq 128(%rsp),%rbx |
2596 | movq %r14,%r8 |
2597 | movq %r15,%r9 |
2598 | movq %rsi,%r14 |
2599 | movq %rbp,%r15 |
2600 | call __ecp_nistz256_sub_fromq |
2601 | |
2602 | movq 0+0(%rsp),%rax |
2603 | movq 0+8(%rsp),%rbp |
2604 | movq 0+16(%rsp),%rcx |
2605 | movq 0+24(%rsp),%r10 |
2606 | leaq 0(%rsp),%rdi |
2607 | call __ecp_nistz256_subq |
2608 | |
2609 | movq 32(%rsp),%rax |
2610 | leaq 32(%rsp),%rbx |
2611 | movq %r12,%r14 |
2612 | xorl %ecx,%ecx |
2613 | movq %r12,0+0(%rsp) |
2614 | movq %r13,%r10 |
2615 | movq %r13,0+8(%rsp) |
2616 | cmovzq %r8,%r11 |
2617 | movq %r8,0+16(%rsp) |
2618 | leaq 0-0(%rsp),%rsi |
2619 | cmovzq %r9,%r12 |
2620 | movq %r9,0+24(%rsp) |
2621 | movq %r14,%r9 |
2622 | leaq 0(%rsp),%rdi |
2623 | call __ecp_nistz256_mul_montq |
2624 | |
2625 | .byte 102,72,15,126,203 |
2626 | .byte 102,72,15,126,207 |
2627 | call __ecp_nistz256_sub_fromq |
2628 | |
2629 | leaq 160+56(%rsp),%rsi |
2630 | .cfi_def_cfa %rsi,8 |
2631 | movq -48(%rsi),%r15 |
2632 | .cfi_restore %r15 |
2633 | movq -40(%rsi),%r14 |
2634 | .cfi_restore %r14 |
2635 | movq -32(%rsi),%r13 |
2636 | .cfi_restore %r13 |
2637 | movq -24(%rsi),%r12 |
2638 | .cfi_restore %r12 |
2639 | movq -16(%rsi),%rbx |
2640 | .cfi_restore %rbx |
2641 | movq -8(%rsi),%rbp |
2642 | .cfi_restore %rbp |
2643 | leaq (%rsi),%rsp |
2644 | .cfi_def_cfa_register %rsp |
2645 | .Lpoint_doubleq_epilogue: |
2646 | .byte 0xf3,0xc3 |
2647 | .cfi_endproc |
2648 | .size ecp_nistz256_point_double,.-ecp_nistz256_point_double |
2649 | .globl ecp_nistz256_point_add |
2650 | .hidden ecp_nistz256_point_add |
2651 | .type ecp_nistz256_point_add,@function |
2652 | .align 32 |
2653 | ecp_nistz256_point_add: |
2654 | .cfi_startproc |
2655 | leaq OPENSSL_ia32cap_P(%rip),%rcx |
2656 | movq 8(%rcx),%rcx |
2657 | andl $0x80100,%ecx |
2658 | cmpl $0x80100,%ecx |
2659 | je .Lpoint_addx |
2660 | pushq %rbp |
2661 | .cfi_adjust_cfa_offset 8 |
2662 | .cfi_offset %rbp,-16 |
2663 | pushq %rbx |
2664 | .cfi_adjust_cfa_offset 8 |
2665 | .cfi_offset %rbx,-24 |
2666 | pushq %r12 |
2667 | .cfi_adjust_cfa_offset 8 |
2668 | .cfi_offset %r12,-32 |
2669 | pushq %r13 |
2670 | .cfi_adjust_cfa_offset 8 |
2671 | .cfi_offset %r13,-40 |
2672 | pushq %r14 |
2673 | .cfi_adjust_cfa_offset 8 |
2674 | .cfi_offset %r14,-48 |
2675 | pushq %r15 |
2676 | .cfi_adjust_cfa_offset 8 |
2677 | .cfi_offset %r15,-56 |
2678 | subq $576+8,%rsp |
2679 | .cfi_adjust_cfa_offset 32*18+8 |
2680 | .Lpoint_addq_body: |
2681 | |
2682 | movdqu 0(%rsi),%xmm0 |
2683 | movdqu 16(%rsi),%xmm1 |
2684 | movdqu 32(%rsi),%xmm2 |
2685 | movdqu 48(%rsi),%xmm3 |
2686 | movdqu 64(%rsi),%xmm4 |
2687 | movdqu 80(%rsi),%xmm5 |
2688 | movq %rsi,%rbx |
2689 | movq %rdx,%rsi |
2690 | movdqa %xmm0,384(%rsp) |
2691 | movdqa %xmm1,384+16(%rsp) |
2692 | movdqa %xmm2,416(%rsp) |
2693 | movdqa %xmm3,416+16(%rsp) |
2694 | movdqa %xmm4,448(%rsp) |
2695 | movdqa %xmm5,448+16(%rsp) |
2696 | por %xmm4,%xmm5 |
2697 | |
2698 | movdqu 0(%rsi),%xmm0 |
2699 | pshufd $0xb1,%xmm5,%xmm3 |
2700 | movdqu 16(%rsi),%xmm1 |
2701 | movdqu 32(%rsi),%xmm2 |
2702 | por %xmm3,%xmm5 |
2703 | movdqu 48(%rsi),%xmm3 |
2704 | movq 64+0(%rsi),%rax |
2705 | movq 64+8(%rsi),%r14 |
2706 | movq 64+16(%rsi),%r15 |
2707 | movq 64+24(%rsi),%r8 |
2708 | movdqa %xmm0,480(%rsp) |
2709 | pshufd $0x1e,%xmm5,%xmm4 |
2710 | movdqa %xmm1,480+16(%rsp) |
2711 | movdqu 64(%rsi),%xmm0 |
2712 | movdqu 80(%rsi),%xmm1 |
2713 | movdqa %xmm2,512(%rsp) |
2714 | movdqa %xmm3,512+16(%rsp) |
2715 | por %xmm4,%xmm5 |
2716 | pxor %xmm4,%xmm4 |
2717 | por %xmm0,%xmm1 |
2718 | .byte 102,72,15,110,199 |
2719 | |
2720 | leaq 64-0(%rsi),%rsi |
2721 | movq %rax,544+0(%rsp) |
2722 | movq %r14,544+8(%rsp) |
2723 | movq %r15,544+16(%rsp) |
2724 | movq %r8,544+24(%rsp) |
2725 | leaq 96(%rsp),%rdi |
2726 | call __ecp_nistz256_sqr_montq |
2727 | |
2728 | pcmpeqd %xmm4,%xmm5 |
2729 | pshufd $0xb1,%xmm1,%xmm4 |
2730 | por %xmm1,%xmm4 |
2731 | pshufd $0,%xmm5,%xmm5 |
2732 | pshufd $0x1e,%xmm4,%xmm3 |
2733 | por %xmm3,%xmm4 |
2734 | pxor %xmm3,%xmm3 |
2735 | pcmpeqd %xmm3,%xmm4 |
2736 | pshufd $0,%xmm4,%xmm4 |
2737 | movq 64+0(%rbx),%rax |
2738 | movq 64+8(%rbx),%r14 |
2739 | movq 64+16(%rbx),%r15 |
2740 | movq 64+24(%rbx),%r8 |
2741 | .byte 102,72,15,110,203 |
2742 | |
2743 | leaq 64-0(%rbx),%rsi |
2744 | leaq 32(%rsp),%rdi |
2745 | call __ecp_nistz256_sqr_montq |
2746 | |
2747 | movq 544(%rsp),%rax |
2748 | leaq 544(%rsp),%rbx |
2749 | movq 0+96(%rsp),%r9 |
2750 | movq 8+96(%rsp),%r10 |
2751 | leaq 0+96(%rsp),%rsi |
2752 | movq 16+96(%rsp),%r11 |
2753 | movq 24+96(%rsp),%r12 |
2754 | leaq 224(%rsp),%rdi |
2755 | call __ecp_nistz256_mul_montq |
2756 | |
2757 | movq 448(%rsp),%rax |
2758 | leaq 448(%rsp),%rbx |
2759 | movq 0+32(%rsp),%r9 |
2760 | movq 8+32(%rsp),%r10 |
2761 | leaq 0+32(%rsp),%rsi |
2762 | movq 16+32(%rsp),%r11 |
2763 | movq 24+32(%rsp),%r12 |
2764 | leaq 256(%rsp),%rdi |
2765 | call __ecp_nistz256_mul_montq |
2766 | |
2767 | movq 416(%rsp),%rax |
2768 | leaq 416(%rsp),%rbx |
2769 | movq 0+224(%rsp),%r9 |
2770 | movq 8+224(%rsp),%r10 |
2771 | leaq 0+224(%rsp),%rsi |
2772 | movq 16+224(%rsp),%r11 |
2773 | movq 24+224(%rsp),%r12 |
2774 | leaq 224(%rsp),%rdi |
2775 | call __ecp_nistz256_mul_montq |
2776 | |
2777 | movq 512(%rsp),%rax |
2778 | leaq 512(%rsp),%rbx |
2779 | movq 0+256(%rsp),%r9 |
2780 | movq 8+256(%rsp),%r10 |
2781 | leaq 0+256(%rsp),%rsi |
2782 | movq 16+256(%rsp),%r11 |
2783 | movq 24+256(%rsp),%r12 |
2784 | leaq 256(%rsp),%rdi |
2785 | call __ecp_nistz256_mul_montq |
2786 | |
2787 | leaq 224(%rsp),%rbx |
2788 | leaq 64(%rsp),%rdi |
2789 | call __ecp_nistz256_sub_fromq |
2790 | |
2791 | orq %r13,%r12 |
2792 | movdqa %xmm4,%xmm2 |
2793 | orq %r8,%r12 |
2794 | orq %r9,%r12 |
2795 | por %xmm5,%xmm2 |
2796 | .byte 102,73,15,110,220 |
2797 | |
2798 | movq 384(%rsp),%rax |
2799 | leaq 384(%rsp),%rbx |
2800 | movq 0+96(%rsp),%r9 |
2801 | movq 8+96(%rsp),%r10 |
2802 | leaq 0+96(%rsp),%rsi |
2803 | movq 16+96(%rsp),%r11 |
2804 | movq 24+96(%rsp),%r12 |
2805 | leaq 160(%rsp),%rdi |
2806 | call __ecp_nistz256_mul_montq |
2807 | |
2808 | movq 480(%rsp),%rax |
2809 | leaq 480(%rsp),%rbx |
2810 | movq 0+32(%rsp),%r9 |
2811 | movq 8+32(%rsp),%r10 |
2812 | leaq 0+32(%rsp),%rsi |
2813 | movq 16+32(%rsp),%r11 |
2814 | movq 24+32(%rsp),%r12 |
2815 | leaq 192(%rsp),%rdi |
2816 | call __ecp_nistz256_mul_montq |
2817 | |
2818 | leaq 160(%rsp),%rbx |
2819 | leaq 0(%rsp),%rdi |
2820 | call __ecp_nistz256_sub_fromq |
2821 | |
2822 | orq %r13,%r12 |
2823 | orq %r8,%r12 |
2824 | orq %r9,%r12 |
2825 | |
2826 | .byte 102,73,15,126,208 |
2827 | .byte 102,73,15,126,217 |
2828 | orq %r8,%r12 |
2829 | .byte 0x3e |
2830 | jnz .Ladd_proceedq |
2831 | |
2832 | |
2833 | |
2834 | testq %r9,%r9 |
2835 | jz .Ladd_doubleq |
2836 | |
2837 | |
2838 | |
2839 | |
2840 | |
2841 | |
2842 | .byte 102,72,15,126,199 |
2843 | pxor %xmm0,%xmm0 |
2844 | movdqu %xmm0,0(%rdi) |
2845 | movdqu %xmm0,16(%rdi) |
2846 | movdqu %xmm0,32(%rdi) |
2847 | movdqu %xmm0,48(%rdi) |
2848 | movdqu %xmm0,64(%rdi) |
2849 | movdqu %xmm0,80(%rdi) |
2850 | jmp .Ladd_doneq |
2851 | |
2852 | .align 32 |
2853 | .Ladd_doubleq: |
2854 | .byte 102,72,15,126,206 |
2855 | .byte 102,72,15,126,199 |
2856 | addq $416,%rsp |
2857 | .cfi_adjust_cfa_offset -416 |
2858 | jmp .Lpoint_double_shortcutq |
2859 | .cfi_adjust_cfa_offset 416 |
2860 | |
2861 | .align 32 |
2862 | .Ladd_proceedq: |
2863 | movq 0+64(%rsp),%rax |
2864 | movq 8+64(%rsp),%r14 |
2865 | leaq 0+64(%rsp),%rsi |
2866 | movq 16+64(%rsp),%r15 |
2867 | movq 24+64(%rsp),%r8 |
2868 | leaq 96(%rsp),%rdi |
2869 | call __ecp_nistz256_sqr_montq |
2870 | |
2871 | movq 448(%rsp),%rax |
2872 | leaq 448(%rsp),%rbx |
2873 | movq 0+0(%rsp),%r9 |
2874 | movq 8+0(%rsp),%r10 |
2875 | leaq 0+0(%rsp),%rsi |
2876 | movq 16+0(%rsp),%r11 |
2877 | movq 24+0(%rsp),%r12 |
2878 | leaq 352(%rsp),%rdi |
2879 | call __ecp_nistz256_mul_montq |
2880 | |
2881 | movq 0+0(%rsp),%rax |
2882 | movq 8+0(%rsp),%r14 |
2883 | leaq 0+0(%rsp),%rsi |
2884 | movq 16+0(%rsp),%r15 |
2885 | movq 24+0(%rsp),%r8 |
2886 | leaq 32(%rsp),%rdi |
2887 | call __ecp_nistz256_sqr_montq |
2888 | |
2889 | movq 544(%rsp),%rax |
2890 | leaq 544(%rsp),%rbx |
2891 | movq 0+352(%rsp),%r9 |
2892 | movq 8+352(%rsp),%r10 |
2893 | leaq 0+352(%rsp),%rsi |
2894 | movq 16+352(%rsp),%r11 |
2895 | movq 24+352(%rsp),%r12 |
2896 | leaq 352(%rsp),%rdi |
2897 | call __ecp_nistz256_mul_montq |
2898 | |
2899 | movq 0(%rsp),%rax |
2900 | leaq 0(%rsp),%rbx |
2901 | movq 0+32(%rsp),%r9 |
2902 | movq 8+32(%rsp),%r10 |
2903 | leaq 0+32(%rsp),%rsi |
2904 | movq 16+32(%rsp),%r11 |
2905 | movq 24+32(%rsp),%r12 |
2906 | leaq 128(%rsp),%rdi |
2907 | call __ecp_nistz256_mul_montq |
2908 | |
2909 | movq 160(%rsp),%rax |
2910 | leaq 160(%rsp),%rbx |
2911 | movq 0+32(%rsp),%r9 |
2912 | movq 8+32(%rsp),%r10 |
2913 | leaq 0+32(%rsp),%rsi |
2914 | movq 16+32(%rsp),%r11 |
2915 | movq 24+32(%rsp),%r12 |
2916 | leaq 192(%rsp),%rdi |
2917 | call __ecp_nistz256_mul_montq |
2918 | |
2919 | |
2920 | |
2921 | |
2922 | xorq %r11,%r11 |
2923 | addq %r12,%r12 |
2924 | leaq 96(%rsp),%rsi |
2925 | adcq %r13,%r13 |
2926 | movq %r12,%rax |
2927 | adcq %r8,%r8 |
2928 | adcq %r9,%r9 |
2929 | movq %r13,%rbp |
2930 | adcq $0,%r11 |
2931 | |
2932 | subq $-1,%r12 |
2933 | movq %r8,%rcx |
2934 | sbbq %r14,%r13 |
2935 | sbbq $0,%r8 |
2936 | movq %r9,%r10 |
2937 | sbbq %r15,%r9 |
2938 | sbbq $0,%r11 |
2939 | |
2940 | cmovcq %rax,%r12 |
2941 | movq 0(%rsi),%rax |
2942 | cmovcq %rbp,%r13 |
2943 | movq 8(%rsi),%rbp |
2944 | cmovcq %rcx,%r8 |
2945 | movq 16(%rsi),%rcx |
2946 | cmovcq %r10,%r9 |
2947 | movq 24(%rsi),%r10 |
2948 | |
2949 | call __ecp_nistz256_subq |
2950 | |
2951 | leaq 128(%rsp),%rbx |
2952 | leaq 288(%rsp),%rdi |
2953 | call __ecp_nistz256_sub_fromq |
2954 | |
2955 | movq 192+0(%rsp),%rax |
2956 | movq 192+8(%rsp),%rbp |
2957 | movq 192+16(%rsp),%rcx |
2958 | movq 192+24(%rsp),%r10 |
2959 | leaq 320(%rsp),%rdi |
2960 | |
2961 | call __ecp_nistz256_subq |
2962 | |
2963 | movq %r12,0(%rdi) |
2964 | movq %r13,8(%rdi) |
2965 | movq %r8,16(%rdi) |
2966 | movq %r9,24(%rdi) |
2967 | movq 128(%rsp),%rax |
2968 | leaq 128(%rsp),%rbx |
2969 | movq 0+224(%rsp),%r9 |
2970 | movq 8+224(%rsp),%r10 |
2971 | leaq 0+224(%rsp),%rsi |
2972 | movq 16+224(%rsp),%r11 |
2973 | movq 24+224(%rsp),%r12 |
2974 | leaq 256(%rsp),%rdi |
2975 | call __ecp_nistz256_mul_montq |
2976 | |
2977 | movq 320(%rsp),%rax |
2978 | leaq 320(%rsp),%rbx |
2979 | movq 0+64(%rsp),%r9 |
2980 | movq 8+64(%rsp),%r10 |
2981 | leaq 0+64(%rsp),%rsi |
2982 | movq 16+64(%rsp),%r11 |
2983 | movq 24+64(%rsp),%r12 |
2984 | leaq 320(%rsp),%rdi |
2985 | call __ecp_nistz256_mul_montq |
2986 | |
2987 | leaq 256(%rsp),%rbx |
2988 | leaq 320(%rsp),%rdi |
2989 | call __ecp_nistz256_sub_fromq |
2990 | |
2991 | .byte 102,72,15,126,199 |
2992 | |
2993 | movdqa %xmm5,%xmm0 |
2994 | movdqa %xmm5,%xmm1 |
2995 | pandn 352(%rsp),%xmm0 |
2996 | movdqa %xmm5,%xmm2 |
2997 | pandn 352+16(%rsp),%xmm1 |
2998 | movdqa %xmm5,%xmm3 |
2999 | pand 544(%rsp),%xmm2 |
3000 | pand 544+16(%rsp),%xmm3 |
3001 | por %xmm0,%xmm2 |
3002 | por %xmm1,%xmm3 |
3003 | |
3004 | movdqa %xmm4,%xmm0 |
3005 | movdqa %xmm4,%xmm1 |
3006 | pandn %xmm2,%xmm0 |
3007 | movdqa %xmm4,%xmm2 |
3008 | pandn %xmm3,%xmm1 |
3009 | movdqa %xmm4,%xmm3 |
3010 | pand 448(%rsp),%xmm2 |
3011 | pand 448+16(%rsp),%xmm3 |
3012 | por %xmm0,%xmm2 |
3013 | por %xmm1,%xmm3 |
3014 | movdqu %xmm2,64(%rdi) |
3015 | movdqu %xmm3,80(%rdi) |
3016 | |
3017 | movdqa %xmm5,%xmm0 |
3018 | movdqa %xmm5,%xmm1 |
3019 | pandn 288(%rsp),%xmm0 |
3020 | movdqa %xmm5,%xmm2 |
3021 | pandn 288+16(%rsp),%xmm1 |
3022 | movdqa %xmm5,%xmm3 |
3023 | pand 480(%rsp),%xmm2 |
3024 | pand 480+16(%rsp),%xmm3 |
3025 | por %xmm0,%xmm2 |
3026 | por %xmm1,%xmm3 |
3027 | |
3028 | movdqa %xmm4,%xmm0 |
3029 | movdqa %xmm4,%xmm1 |
3030 | pandn %xmm2,%xmm0 |
3031 | movdqa %xmm4,%xmm2 |
3032 | pandn %xmm3,%xmm1 |
3033 | movdqa %xmm4,%xmm3 |
3034 | pand 384(%rsp),%xmm2 |
3035 | pand 384+16(%rsp),%xmm3 |
3036 | por %xmm0,%xmm2 |
3037 | por %xmm1,%xmm3 |
3038 | movdqu %xmm2,0(%rdi) |
3039 | movdqu %xmm3,16(%rdi) |
3040 | |
3041 | movdqa %xmm5,%xmm0 |
3042 | movdqa %xmm5,%xmm1 |
3043 | pandn 320(%rsp),%xmm0 |
3044 | movdqa %xmm5,%xmm2 |
3045 | pandn 320+16(%rsp),%xmm1 |
3046 | movdqa %xmm5,%xmm3 |
3047 | pand 512(%rsp),%xmm2 |
3048 | pand 512+16(%rsp),%xmm3 |
3049 | por %xmm0,%xmm2 |
3050 | por %xmm1,%xmm3 |
3051 | |
3052 | movdqa %xmm4,%xmm0 |
3053 | movdqa %xmm4,%xmm1 |
3054 | pandn %xmm2,%xmm0 |
3055 | movdqa %xmm4,%xmm2 |
3056 | pandn %xmm3,%xmm1 |
3057 | movdqa %xmm4,%xmm3 |
3058 | pand 416(%rsp),%xmm2 |
3059 | pand 416+16(%rsp),%xmm3 |
3060 | por %xmm0,%xmm2 |
3061 | por %xmm1,%xmm3 |
3062 | movdqu %xmm2,32(%rdi) |
3063 | movdqu %xmm3,48(%rdi) |
3064 | |
3065 | .Ladd_doneq: |
3066 | leaq 576+56(%rsp),%rsi |
3067 | .cfi_def_cfa %rsi,8 |
3068 | movq -48(%rsi),%r15 |
3069 | .cfi_restore %r15 |
3070 | movq -40(%rsi),%r14 |
3071 | .cfi_restore %r14 |
3072 | movq -32(%rsi),%r13 |
3073 | .cfi_restore %r13 |
3074 | movq -24(%rsi),%r12 |
3075 | .cfi_restore %r12 |
3076 | movq -16(%rsi),%rbx |
3077 | .cfi_restore %rbx |
3078 | movq -8(%rsi),%rbp |
3079 | .cfi_restore %rbp |
3080 | leaq (%rsi),%rsp |
3081 | .cfi_def_cfa_register %rsp |
3082 | .Lpoint_addq_epilogue: |
3083 | .byte 0xf3,0xc3 |
3084 | .cfi_endproc |
3085 | .size ecp_nistz256_point_add,.-ecp_nistz256_point_add |
3086 | .globl ecp_nistz256_point_add_affine |
3087 | .hidden ecp_nistz256_point_add_affine |
3088 | .type ecp_nistz256_point_add_affine,@function |
3089 | .align 32 |
3090 | ecp_nistz256_point_add_affine: |
3091 | .cfi_startproc |
3092 | leaq OPENSSL_ia32cap_P(%rip),%rcx |
3093 | movq 8(%rcx),%rcx |
3094 | andl $0x80100,%ecx |
3095 | cmpl $0x80100,%ecx |
3096 | je .Lpoint_add_affinex |
3097 | pushq %rbp |
3098 | .cfi_adjust_cfa_offset 8 |
3099 | .cfi_offset %rbp,-16 |
3100 | pushq %rbx |
3101 | .cfi_adjust_cfa_offset 8 |
3102 | .cfi_offset %rbx,-24 |
3103 | pushq %r12 |
3104 | .cfi_adjust_cfa_offset 8 |
3105 | .cfi_offset %r12,-32 |
3106 | pushq %r13 |
3107 | .cfi_adjust_cfa_offset 8 |
3108 | .cfi_offset %r13,-40 |
3109 | pushq %r14 |
3110 | .cfi_adjust_cfa_offset 8 |
3111 | .cfi_offset %r14,-48 |
3112 | pushq %r15 |
3113 | .cfi_adjust_cfa_offset 8 |
3114 | .cfi_offset %r15,-56 |
3115 | subq $480+8,%rsp |
3116 | .cfi_adjust_cfa_offset 32*15+8 |
3117 | .Ladd_affineq_body: |
3118 | |
3119 | movdqu 0(%rsi),%xmm0 |
3120 | movq %rdx,%rbx |
3121 | movdqu 16(%rsi),%xmm1 |
3122 | movdqu 32(%rsi),%xmm2 |
3123 | movdqu 48(%rsi),%xmm3 |
3124 | movdqu 64(%rsi),%xmm4 |
3125 | movdqu 80(%rsi),%xmm5 |
3126 | movq 64+0(%rsi),%rax |
3127 | movq 64+8(%rsi),%r14 |
3128 | movq 64+16(%rsi),%r15 |
3129 | movq 64+24(%rsi),%r8 |
3130 | movdqa %xmm0,320(%rsp) |
3131 | movdqa %xmm1,320+16(%rsp) |
3132 | movdqa %xmm2,352(%rsp) |
3133 | movdqa %xmm3,352+16(%rsp) |
3134 | movdqa %xmm4,384(%rsp) |
3135 | movdqa %xmm5,384+16(%rsp) |
3136 | por %xmm4,%xmm5 |
3137 | |
3138 | movdqu 0(%rbx),%xmm0 |
3139 | pshufd $0xb1,%xmm5,%xmm3 |
3140 | movdqu 16(%rbx),%xmm1 |
3141 | movdqu 32(%rbx),%xmm2 |
3142 | por %xmm3,%xmm5 |
3143 | movdqu 48(%rbx),%xmm3 |
3144 | movdqa %xmm0,416(%rsp) |
3145 | pshufd $0x1e,%xmm5,%xmm4 |
3146 | movdqa %xmm1,416+16(%rsp) |
3147 | por %xmm0,%xmm1 |
3148 | .byte 102,72,15,110,199 |
3149 | movdqa %xmm2,448(%rsp) |
3150 | movdqa %xmm3,448+16(%rsp) |
3151 | por %xmm2,%xmm3 |
3152 | por %xmm4,%xmm5 |
3153 | pxor %xmm4,%xmm4 |
3154 | por %xmm1,%xmm3 |
3155 | |
3156 | leaq 64-0(%rsi),%rsi |
3157 | leaq 32(%rsp),%rdi |
3158 | call __ecp_nistz256_sqr_montq |
3159 | |
3160 | pcmpeqd %xmm4,%xmm5 |
3161 | pshufd $0xb1,%xmm3,%xmm4 |
3162 | movq 0(%rbx),%rax |
3163 | |
3164 | movq %r12,%r9 |
3165 | por %xmm3,%xmm4 |
3166 | pshufd $0,%xmm5,%xmm5 |
3167 | pshufd $0x1e,%xmm4,%xmm3 |
3168 | movq %r13,%r10 |
3169 | por %xmm3,%xmm4 |
3170 | pxor %xmm3,%xmm3 |
3171 | movq %r14,%r11 |
3172 | pcmpeqd %xmm3,%xmm4 |
3173 | pshufd $0,%xmm4,%xmm4 |
3174 | |
3175 | leaq 32-0(%rsp),%rsi |
3176 | movq %r15,%r12 |
3177 | leaq 0(%rsp),%rdi |
3178 | call __ecp_nistz256_mul_montq |
3179 | |
3180 | leaq 320(%rsp),%rbx |
3181 | leaq 64(%rsp),%rdi |
3182 | call __ecp_nistz256_sub_fromq |
3183 | |
3184 | movq 384(%rsp),%rax |
3185 | leaq 384(%rsp),%rbx |
3186 | movq 0+32(%rsp),%r9 |
3187 | movq 8+32(%rsp),%r10 |
3188 | leaq 0+32(%rsp),%rsi |
3189 | movq 16+32(%rsp),%r11 |
3190 | movq 24+32(%rsp),%r12 |
3191 | leaq 32(%rsp),%rdi |
3192 | call __ecp_nistz256_mul_montq |
3193 | |
3194 | movq 384(%rsp),%rax |
3195 | leaq 384(%rsp),%rbx |
3196 | movq 0+64(%rsp),%r9 |
3197 | movq 8+64(%rsp),%r10 |
3198 | leaq 0+64(%rsp),%rsi |
3199 | movq 16+64(%rsp),%r11 |
3200 | movq 24+64(%rsp),%r12 |
3201 | leaq 288(%rsp),%rdi |
3202 | call __ecp_nistz256_mul_montq |
3203 | |
3204 | movq 448(%rsp),%rax |
3205 | leaq 448(%rsp),%rbx |
3206 | movq 0+32(%rsp),%r9 |
3207 | movq 8+32(%rsp),%r10 |
3208 | leaq 0+32(%rsp),%rsi |
3209 | movq 16+32(%rsp),%r11 |
3210 | movq 24+32(%rsp),%r12 |
3211 | leaq 32(%rsp),%rdi |
3212 | call __ecp_nistz256_mul_montq |
3213 | |
3214 | leaq 352(%rsp),%rbx |
3215 | leaq 96(%rsp),%rdi |
3216 | call __ecp_nistz256_sub_fromq |
3217 | |
3218 | movq 0+64(%rsp),%rax |
3219 | movq 8+64(%rsp),%r14 |
3220 | leaq 0+64(%rsp),%rsi |
3221 | movq 16+64(%rsp),%r15 |
3222 | movq 24+64(%rsp),%r8 |
3223 | leaq 128(%rsp),%rdi |
3224 | call __ecp_nistz256_sqr_montq |
3225 | |
3226 | movq 0+96(%rsp),%rax |
3227 | movq 8+96(%rsp),%r14 |
3228 | leaq 0+96(%rsp),%rsi |
3229 | movq 16+96(%rsp),%r15 |
3230 | movq 24+96(%rsp),%r8 |
3231 | leaq 192(%rsp),%rdi |
3232 | call __ecp_nistz256_sqr_montq |
3233 | |
3234 | movq 128(%rsp),%rax |
3235 | leaq 128(%rsp),%rbx |
3236 | movq 0+64(%rsp),%r9 |
3237 | movq 8+64(%rsp),%r10 |
3238 | leaq 0+64(%rsp),%rsi |
3239 | movq 16+64(%rsp),%r11 |
3240 | movq 24+64(%rsp),%r12 |
3241 | leaq 160(%rsp),%rdi |
3242 | call __ecp_nistz256_mul_montq |
3243 | |
3244 | movq 320(%rsp),%rax |
3245 | leaq 320(%rsp),%rbx |
3246 | movq 0+128(%rsp),%r9 |
3247 | movq 8+128(%rsp),%r10 |
3248 | leaq 0+128(%rsp),%rsi |
3249 | movq 16+128(%rsp),%r11 |
3250 | movq 24+128(%rsp),%r12 |
3251 | leaq 0(%rsp),%rdi |
3252 | call __ecp_nistz256_mul_montq |
3253 | |
3254 | |
3255 | |
3256 | |
3257 | xorq %r11,%r11 |
3258 | addq %r12,%r12 |
3259 | leaq 192(%rsp),%rsi |
3260 | adcq %r13,%r13 |
3261 | movq %r12,%rax |
3262 | adcq %r8,%r8 |
3263 | adcq %r9,%r9 |
3264 | movq %r13,%rbp |
3265 | adcq $0,%r11 |
3266 | |
3267 | subq $-1,%r12 |
3268 | movq %r8,%rcx |
3269 | sbbq %r14,%r13 |
3270 | sbbq $0,%r8 |
3271 | movq %r9,%r10 |
3272 | sbbq %r15,%r9 |
3273 | sbbq $0,%r11 |
3274 | |
3275 | cmovcq %rax,%r12 |
3276 | movq 0(%rsi),%rax |
3277 | cmovcq %rbp,%r13 |
3278 | movq 8(%rsi),%rbp |
3279 | cmovcq %rcx,%r8 |
3280 | movq 16(%rsi),%rcx |
3281 | cmovcq %r10,%r9 |
3282 | movq 24(%rsi),%r10 |
3283 | |
3284 | call __ecp_nistz256_subq |
3285 | |
3286 | leaq 160(%rsp),%rbx |
3287 | leaq 224(%rsp),%rdi |
3288 | call __ecp_nistz256_sub_fromq |
3289 | |
3290 | movq 0+0(%rsp),%rax |
3291 | movq 0+8(%rsp),%rbp |
3292 | movq 0+16(%rsp),%rcx |
3293 | movq 0+24(%rsp),%r10 |
3294 | leaq 64(%rsp),%rdi |
3295 | |
3296 | call __ecp_nistz256_subq |
3297 | |
3298 | movq %r12,0(%rdi) |
3299 | movq %r13,8(%rdi) |
3300 | movq %r8,16(%rdi) |
3301 | movq %r9,24(%rdi) |
3302 | movq 352(%rsp),%rax |
3303 | leaq 352(%rsp),%rbx |
3304 | movq 0+160(%rsp),%r9 |
3305 | movq 8+160(%rsp),%r10 |
3306 | leaq 0+160(%rsp),%rsi |
3307 | movq 16+160(%rsp),%r11 |
3308 | movq 24+160(%rsp),%r12 |
3309 | leaq 32(%rsp),%rdi |
3310 | call __ecp_nistz256_mul_montq |
3311 | |
3312 | movq 96(%rsp),%rax |
3313 | leaq 96(%rsp),%rbx |
3314 | movq 0+64(%rsp),%r9 |
3315 | movq 8+64(%rsp),%r10 |
3316 | leaq 0+64(%rsp),%rsi |
3317 | movq 16+64(%rsp),%r11 |
3318 | movq 24+64(%rsp),%r12 |
3319 | leaq 64(%rsp),%rdi |
3320 | call __ecp_nistz256_mul_montq |
3321 | |
3322 | leaq 32(%rsp),%rbx |
3323 | leaq 256(%rsp),%rdi |
3324 | call __ecp_nistz256_sub_fromq |
3325 | |
3326 | .byte 102,72,15,126,199 |
3327 | |
3328 | movdqa %xmm5,%xmm0 |
3329 | movdqa %xmm5,%xmm1 |
3330 | pandn 288(%rsp),%xmm0 |
3331 | movdqa %xmm5,%xmm2 |
3332 | pandn 288+16(%rsp),%xmm1 |
3333 | movdqa %xmm5,%xmm3 |
3334 | pand .LONE_mont(%rip),%xmm2 |
3335 | pand .LONE_mont+16(%rip),%xmm3 |
3336 | por %xmm0,%xmm2 |
3337 | por %xmm1,%xmm3 |
3338 | |
3339 | movdqa %xmm4,%xmm0 |
3340 | movdqa %xmm4,%xmm1 |
3341 | pandn %xmm2,%xmm0 |
3342 | movdqa %xmm4,%xmm2 |
3343 | pandn %xmm3,%xmm1 |
3344 | movdqa %xmm4,%xmm3 |
3345 | pand 384(%rsp),%xmm2 |
3346 | pand 384+16(%rsp),%xmm3 |
3347 | por %xmm0,%xmm2 |
3348 | por %xmm1,%xmm3 |
3349 | movdqu %xmm2,64(%rdi) |
3350 | movdqu %xmm3,80(%rdi) |
3351 | |
3352 | movdqa %xmm5,%xmm0 |
3353 | movdqa %xmm5,%xmm1 |
3354 | pandn 224(%rsp),%xmm0 |
3355 | movdqa %xmm5,%xmm2 |
3356 | pandn 224+16(%rsp),%xmm1 |
3357 | movdqa %xmm5,%xmm3 |
3358 | pand 416(%rsp),%xmm2 |
3359 | pand 416+16(%rsp),%xmm3 |
3360 | por %xmm0,%xmm2 |
3361 | por %xmm1,%xmm3 |
3362 | |
3363 | movdqa %xmm4,%xmm0 |
3364 | movdqa %xmm4,%xmm1 |
3365 | pandn %xmm2,%xmm0 |
3366 | movdqa %xmm4,%xmm2 |
3367 | pandn %xmm3,%xmm1 |
3368 | movdqa %xmm4,%xmm3 |
3369 | pand 320(%rsp),%xmm2 |
3370 | pand 320+16(%rsp),%xmm3 |
3371 | por %xmm0,%xmm2 |
3372 | por %xmm1,%xmm3 |
3373 | movdqu %xmm2,0(%rdi) |
3374 | movdqu %xmm3,16(%rdi) |
3375 | |
3376 | movdqa %xmm5,%xmm0 |
3377 | movdqa %xmm5,%xmm1 |
3378 | pandn 256(%rsp),%xmm0 |
3379 | movdqa %xmm5,%xmm2 |
3380 | pandn 256+16(%rsp),%xmm1 |
3381 | movdqa %xmm5,%xmm3 |
3382 | pand 448(%rsp),%xmm2 |
3383 | pand 448+16(%rsp),%xmm3 |
3384 | por %xmm0,%xmm2 |
3385 | por %xmm1,%xmm3 |
3386 | |
3387 | movdqa %xmm4,%xmm0 |
3388 | movdqa %xmm4,%xmm1 |
3389 | pandn %xmm2,%xmm0 |
3390 | movdqa %xmm4,%xmm2 |
3391 | pandn %xmm3,%xmm1 |
3392 | movdqa %xmm4,%xmm3 |
3393 | pand 352(%rsp),%xmm2 |
3394 | pand 352+16(%rsp),%xmm3 |
3395 | por %xmm0,%xmm2 |
3396 | por %xmm1,%xmm3 |
3397 | movdqu %xmm2,32(%rdi) |
3398 | movdqu %xmm3,48(%rdi) |
3399 | |
3400 | leaq 480+56(%rsp),%rsi |
3401 | .cfi_def_cfa %rsi,8 |
3402 | movq -48(%rsi),%r15 |
3403 | .cfi_restore %r15 |
3404 | movq -40(%rsi),%r14 |
3405 | .cfi_restore %r14 |
3406 | movq -32(%rsi),%r13 |
3407 | .cfi_restore %r13 |
3408 | movq -24(%rsi),%r12 |
3409 | .cfi_restore %r12 |
3410 | movq -16(%rsi),%rbx |
3411 | .cfi_restore %rbx |
3412 | movq -8(%rsi),%rbp |
3413 | .cfi_restore %rbp |
3414 | leaq (%rsi),%rsp |
3415 | .cfi_def_cfa_register %rsp |
3416 | .Ladd_affineq_epilogue: |
3417 | .byte 0xf3,0xc3 |
3418 | .cfi_endproc |
3419 | .size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine |
3420 | .type __ecp_nistz256_add_tox,@function |
3421 | .align 32 |
3422 | __ecp_nistz256_add_tox: |
3423 | .cfi_startproc |
3424 | xorq %r11,%r11 |
3425 | adcq 0(%rbx),%r12 |
3426 | adcq 8(%rbx),%r13 |
3427 | movq %r12,%rax |
3428 | adcq 16(%rbx),%r8 |
3429 | adcq 24(%rbx),%r9 |
3430 | movq %r13,%rbp |
3431 | adcq $0,%r11 |
3432 | |
3433 | xorq %r10,%r10 |
3434 | sbbq $-1,%r12 |
3435 | movq %r8,%rcx |
3436 | sbbq %r14,%r13 |
3437 | sbbq $0,%r8 |
3438 | movq %r9,%r10 |
3439 | sbbq %r15,%r9 |
3440 | sbbq $0,%r11 |
3441 | |
3442 | cmovcq %rax,%r12 |
3443 | cmovcq %rbp,%r13 |
3444 | movq %r12,0(%rdi) |
3445 | cmovcq %rcx,%r8 |
3446 | movq %r13,8(%rdi) |
3447 | cmovcq %r10,%r9 |
3448 | movq %r8,16(%rdi) |
3449 | movq %r9,24(%rdi) |
3450 | |
3451 | .byte 0xf3,0xc3 |
3452 | .cfi_endproc |
3453 | .size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox |
3454 | |
3455 | .type __ecp_nistz256_sub_fromx,@function |
3456 | .align 32 |
3457 | __ecp_nistz256_sub_fromx: |
3458 | .cfi_startproc |
3459 | xorq %r11,%r11 |
3460 | sbbq 0(%rbx),%r12 |
3461 | sbbq 8(%rbx),%r13 |
3462 | movq %r12,%rax |
3463 | sbbq 16(%rbx),%r8 |
3464 | sbbq 24(%rbx),%r9 |
3465 | movq %r13,%rbp |
3466 | sbbq $0,%r11 |
3467 | |
3468 | xorq %r10,%r10 |
3469 | adcq $-1,%r12 |
3470 | movq %r8,%rcx |
3471 | adcq %r14,%r13 |
3472 | adcq $0,%r8 |
3473 | movq %r9,%r10 |
3474 | adcq %r15,%r9 |
3475 | |
3476 | btq $0,%r11 |
3477 | cmovncq %rax,%r12 |
3478 | cmovncq %rbp,%r13 |
3479 | movq %r12,0(%rdi) |
3480 | cmovncq %rcx,%r8 |
3481 | movq %r13,8(%rdi) |
3482 | cmovncq %r10,%r9 |
3483 | movq %r8,16(%rdi) |
3484 | movq %r9,24(%rdi) |
3485 | |
3486 | .byte 0xf3,0xc3 |
3487 | .cfi_endproc |
3488 | .size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx |
3489 | |
3490 | .type __ecp_nistz256_subx,@function |
3491 | .align 32 |
3492 | __ecp_nistz256_subx: |
3493 | .cfi_startproc |
3494 | xorq %r11,%r11 |
3495 | sbbq %r12,%rax |
3496 | sbbq %r13,%rbp |
3497 | movq %rax,%r12 |
3498 | sbbq %r8,%rcx |
3499 | sbbq %r9,%r10 |
3500 | movq %rbp,%r13 |
3501 | sbbq $0,%r11 |
3502 | |
3503 | xorq %r9,%r9 |
3504 | adcq $-1,%rax |
3505 | movq %rcx,%r8 |
3506 | adcq %r14,%rbp |
3507 | adcq $0,%rcx |
3508 | movq %r10,%r9 |
3509 | adcq %r15,%r10 |
3510 | |
3511 | btq $0,%r11 |
3512 | cmovcq %rax,%r12 |
3513 | cmovcq %rbp,%r13 |
3514 | cmovcq %rcx,%r8 |
3515 | cmovcq %r10,%r9 |
3516 | |
3517 | .byte 0xf3,0xc3 |
3518 | .cfi_endproc |
3519 | .size __ecp_nistz256_subx,.-__ecp_nistz256_subx |
3520 | |
3521 | .type __ecp_nistz256_mul_by_2x,@function |
3522 | .align 32 |
3523 | __ecp_nistz256_mul_by_2x: |
3524 | .cfi_startproc |
3525 | xorq %r11,%r11 |
3526 | adcq %r12,%r12 |
3527 | adcq %r13,%r13 |
3528 | movq %r12,%rax |
3529 | adcq %r8,%r8 |
3530 | adcq %r9,%r9 |
3531 | movq %r13,%rbp |
3532 | adcq $0,%r11 |
3533 | |
3534 | xorq %r10,%r10 |
3535 | sbbq $-1,%r12 |
3536 | movq %r8,%rcx |
3537 | sbbq %r14,%r13 |
3538 | sbbq $0,%r8 |
3539 | movq %r9,%r10 |
3540 | sbbq %r15,%r9 |
3541 | sbbq $0,%r11 |
3542 | |
3543 | cmovcq %rax,%r12 |
3544 | cmovcq %rbp,%r13 |
3545 | movq %r12,0(%rdi) |
3546 | cmovcq %rcx,%r8 |
3547 | movq %r13,8(%rdi) |
3548 | cmovcq %r10,%r9 |
3549 | movq %r8,16(%rdi) |
3550 | movq %r9,24(%rdi) |
3551 | |
3552 | .byte 0xf3,0xc3 |
3553 | .cfi_endproc |
3554 | .size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x |
3555 | .type ecp_nistz256_point_doublex,@function |
3556 | .align 32 |
3557 | ecp_nistz256_point_doublex: |
3558 | .cfi_startproc |
3559 | .Lpoint_doublex: |
3560 | pushq %rbp |
3561 | .cfi_adjust_cfa_offset 8 |
3562 | .cfi_offset %rbp,-16 |
3563 | pushq %rbx |
3564 | .cfi_adjust_cfa_offset 8 |
3565 | .cfi_offset %rbx,-24 |
3566 | pushq %r12 |
3567 | .cfi_adjust_cfa_offset 8 |
3568 | .cfi_offset %r12,-32 |
3569 | pushq %r13 |
3570 | .cfi_adjust_cfa_offset 8 |
3571 | .cfi_offset %r13,-40 |
3572 | pushq %r14 |
3573 | .cfi_adjust_cfa_offset 8 |
3574 | .cfi_offset %r14,-48 |
3575 | pushq %r15 |
3576 | .cfi_adjust_cfa_offset 8 |
3577 | .cfi_offset %r15,-56 |
3578 | subq $160+8,%rsp |
3579 | .cfi_adjust_cfa_offset 32*5+8 |
3580 | .Lpoint_doublex_body: |
3581 | |
3582 | .Lpoint_double_shortcutx: |
3583 | movdqu 0(%rsi),%xmm0 |
3584 | movq %rsi,%rbx |
3585 | movdqu 16(%rsi),%xmm1 |
3586 | movq 32+0(%rsi),%r12 |
3587 | movq 32+8(%rsi),%r13 |
3588 | movq 32+16(%rsi),%r8 |
3589 | movq 32+24(%rsi),%r9 |
3590 | movq .Lpoly+8(%rip),%r14 |
3591 | movq .Lpoly+24(%rip),%r15 |
3592 | movdqa %xmm0,96(%rsp) |
3593 | movdqa %xmm1,96+16(%rsp) |
3594 | leaq 32(%rdi),%r10 |
3595 | leaq 64(%rdi),%r11 |
3596 | .byte 102,72,15,110,199 |
3597 | .byte 102,73,15,110,202 |
3598 | .byte 102,73,15,110,211 |
3599 | |
3600 | leaq 0(%rsp),%rdi |
3601 | call __ecp_nistz256_mul_by_2x |
3602 | |
3603 | movq 64+0(%rsi),%rdx |
3604 | movq 64+8(%rsi),%r14 |
3605 | movq 64+16(%rsi),%r15 |
3606 | movq 64+24(%rsi),%r8 |
3607 | leaq 64-128(%rsi),%rsi |
3608 | leaq 64(%rsp),%rdi |
3609 | call __ecp_nistz256_sqr_montx |
3610 | |
3611 | movq 0+0(%rsp),%rdx |
3612 | movq 8+0(%rsp),%r14 |
3613 | leaq -128+0(%rsp),%rsi |
3614 | movq 16+0(%rsp),%r15 |
3615 | movq 24+0(%rsp),%r8 |
3616 | leaq 0(%rsp),%rdi |
3617 | call __ecp_nistz256_sqr_montx |
3618 | |
3619 | movq 32(%rbx),%rdx |
3620 | movq 64+0(%rbx),%r9 |
3621 | movq 64+8(%rbx),%r10 |
3622 | movq 64+16(%rbx),%r11 |
3623 | movq 64+24(%rbx),%r12 |
3624 | leaq 64-128(%rbx),%rsi |
3625 | leaq 32(%rbx),%rbx |
3626 | .byte 102,72,15,126,215 |
3627 | call __ecp_nistz256_mul_montx |
3628 | call __ecp_nistz256_mul_by_2x |
3629 | |
3630 | movq 96+0(%rsp),%r12 |
3631 | movq 96+8(%rsp),%r13 |
3632 | leaq 64(%rsp),%rbx |
3633 | movq 96+16(%rsp),%r8 |
3634 | movq 96+24(%rsp),%r9 |
3635 | leaq 32(%rsp),%rdi |
3636 | call __ecp_nistz256_add_tox |
3637 | |
3638 | movq 96+0(%rsp),%r12 |
3639 | movq 96+8(%rsp),%r13 |
3640 | leaq 64(%rsp),%rbx |
3641 | movq 96+16(%rsp),%r8 |
3642 | movq 96+24(%rsp),%r9 |
3643 | leaq 64(%rsp),%rdi |
3644 | call __ecp_nistz256_sub_fromx |
3645 | |
3646 | movq 0+0(%rsp),%rdx |
3647 | movq 8+0(%rsp),%r14 |
3648 | leaq -128+0(%rsp),%rsi |
3649 | movq 16+0(%rsp),%r15 |
3650 | movq 24+0(%rsp),%r8 |
3651 | .byte 102,72,15,126,207 |
3652 | call __ecp_nistz256_sqr_montx |
3653 | xorq %r9,%r9 |
3654 | movq %r12,%rax |
3655 | addq $-1,%r12 |
3656 | movq %r13,%r10 |
3657 | adcq %rsi,%r13 |
3658 | movq %r14,%rcx |
3659 | adcq $0,%r14 |
3660 | movq %r15,%r8 |
3661 | adcq %rbp,%r15 |
3662 | adcq $0,%r9 |
3663 | xorq %rsi,%rsi |
3664 | testq $1,%rax |
3665 | |
3666 | cmovzq %rax,%r12 |
3667 | cmovzq %r10,%r13 |
3668 | cmovzq %rcx,%r14 |
3669 | cmovzq %r8,%r15 |
3670 | cmovzq %rsi,%r9 |
3671 | |
3672 | movq %r13,%rax |
3673 | shrq $1,%r12 |
3674 | shlq $63,%rax |
3675 | movq %r14,%r10 |
3676 | shrq $1,%r13 |
3677 | orq %rax,%r12 |
3678 | shlq $63,%r10 |
3679 | movq %r15,%rcx |
3680 | shrq $1,%r14 |
3681 | orq %r10,%r13 |
3682 | shlq $63,%rcx |
3683 | movq %r12,0(%rdi) |
3684 | shrq $1,%r15 |
3685 | movq %r13,8(%rdi) |
3686 | shlq $63,%r9 |
3687 | orq %rcx,%r14 |
3688 | orq %r9,%r15 |
3689 | movq %r14,16(%rdi) |
3690 | movq %r15,24(%rdi) |
3691 | movq 64(%rsp),%rdx |
3692 | leaq 64(%rsp),%rbx |
3693 | movq 0+32(%rsp),%r9 |
3694 | movq 8+32(%rsp),%r10 |
3695 | leaq -128+32(%rsp),%rsi |
3696 | movq 16+32(%rsp),%r11 |
3697 | movq 24+32(%rsp),%r12 |
3698 | leaq 32(%rsp),%rdi |
3699 | call __ecp_nistz256_mul_montx |
3700 | |
3701 | leaq 128(%rsp),%rdi |
3702 | call __ecp_nistz256_mul_by_2x |
3703 | |
3704 | leaq 32(%rsp),%rbx |
3705 | leaq 32(%rsp),%rdi |
3706 | call __ecp_nistz256_add_tox |
3707 | |
3708 | movq 96(%rsp),%rdx |
3709 | leaq 96(%rsp),%rbx |
3710 | movq 0+0(%rsp),%r9 |
3711 | movq 8+0(%rsp),%r10 |
3712 | leaq -128+0(%rsp),%rsi |
3713 | movq 16+0(%rsp),%r11 |
3714 | movq 24+0(%rsp),%r12 |
3715 | leaq 0(%rsp),%rdi |
3716 | call __ecp_nistz256_mul_montx |
3717 | |
3718 | leaq 128(%rsp),%rdi |
3719 | call __ecp_nistz256_mul_by_2x |
3720 | |
3721 | movq 0+32(%rsp),%rdx |
3722 | movq 8+32(%rsp),%r14 |
3723 | leaq -128+32(%rsp),%rsi |
3724 | movq 16+32(%rsp),%r15 |
3725 | movq 24+32(%rsp),%r8 |
3726 | .byte 102,72,15,126,199 |
3727 | call __ecp_nistz256_sqr_montx |
3728 | |
3729 | leaq 128(%rsp),%rbx |
3730 | movq %r14,%r8 |
3731 | movq %r15,%r9 |
3732 | movq %rsi,%r14 |
3733 | movq %rbp,%r15 |
3734 | call __ecp_nistz256_sub_fromx |
3735 | |
3736 | movq 0+0(%rsp),%rax |
3737 | movq 0+8(%rsp),%rbp |
3738 | movq 0+16(%rsp),%rcx |
3739 | movq 0+24(%rsp),%r10 |
3740 | leaq 0(%rsp),%rdi |
3741 | call __ecp_nistz256_subx |
3742 | |
3743 | movq 32(%rsp),%rdx |
3744 | leaq 32(%rsp),%rbx |
3745 | movq %r12,%r14 |
3746 | xorl %ecx,%ecx |
3747 | movq %r12,0+0(%rsp) |
3748 | movq %r13,%r10 |
3749 | movq %r13,0+8(%rsp) |
3750 | cmovzq %r8,%r11 |
3751 | movq %r8,0+16(%rsp) |
3752 | leaq 0-128(%rsp),%rsi |
3753 | cmovzq %r9,%r12 |
3754 | movq %r9,0+24(%rsp) |
3755 | movq %r14,%r9 |
3756 | leaq 0(%rsp),%rdi |
3757 | call __ecp_nistz256_mul_montx |
3758 | |
3759 | .byte 102,72,15,126,203 |
3760 | .byte 102,72,15,126,207 |
3761 | call __ecp_nistz256_sub_fromx |
3762 | |
3763 | leaq 160+56(%rsp),%rsi |
3764 | .cfi_def_cfa %rsi,8 |
3765 | movq -48(%rsi),%r15 |
3766 | .cfi_restore %r15 |
3767 | movq -40(%rsi),%r14 |
3768 | .cfi_restore %r14 |
3769 | movq -32(%rsi),%r13 |
3770 | .cfi_restore %r13 |
3771 | movq -24(%rsi),%r12 |
3772 | .cfi_restore %r12 |
3773 | movq -16(%rsi),%rbx |
3774 | .cfi_restore %rbx |
3775 | movq -8(%rsi),%rbp |
3776 | .cfi_restore %rbp |
3777 | leaq (%rsi),%rsp |
3778 | .cfi_def_cfa_register %rsp |
3779 | .Lpoint_doublex_epilogue: |
3780 | .byte 0xf3,0xc3 |
3781 | .cfi_endproc |
3782 | .size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex |
3783 | .type ecp_nistz256_point_addx,@function |
3784 | .align 32 |
3785 | ecp_nistz256_point_addx: |
3786 | .cfi_startproc |
3787 | .Lpoint_addx: |
3788 | pushq %rbp |
3789 | .cfi_adjust_cfa_offset 8 |
3790 | .cfi_offset %rbp,-16 |
3791 | pushq %rbx |
3792 | .cfi_adjust_cfa_offset 8 |
3793 | .cfi_offset %rbx,-24 |
3794 | pushq %r12 |
3795 | .cfi_adjust_cfa_offset 8 |
3796 | .cfi_offset %r12,-32 |
3797 | pushq %r13 |
3798 | .cfi_adjust_cfa_offset 8 |
3799 | .cfi_offset %r13,-40 |
3800 | pushq %r14 |
3801 | .cfi_adjust_cfa_offset 8 |
3802 | .cfi_offset %r14,-48 |
3803 | pushq %r15 |
3804 | .cfi_adjust_cfa_offset 8 |
3805 | .cfi_offset %r15,-56 |
3806 | subq $576+8,%rsp |
3807 | .cfi_adjust_cfa_offset 32*18+8 |
3808 | .Lpoint_addx_body: |
3809 | |
3810 | movdqu 0(%rsi),%xmm0 |
3811 | movdqu 16(%rsi),%xmm1 |
3812 | movdqu 32(%rsi),%xmm2 |
3813 | movdqu 48(%rsi),%xmm3 |
3814 | movdqu 64(%rsi),%xmm4 |
3815 | movdqu 80(%rsi),%xmm5 |
3816 | movq %rsi,%rbx |
3817 | movq %rdx,%rsi |
3818 | movdqa %xmm0,384(%rsp) |
3819 | movdqa %xmm1,384+16(%rsp) |
3820 | movdqa %xmm2,416(%rsp) |
3821 | movdqa %xmm3,416+16(%rsp) |
3822 | movdqa %xmm4,448(%rsp) |
3823 | movdqa %xmm5,448+16(%rsp) |
3824 | por %xmm4,%xmm5 |
3825 | |
3826 | movdqu 0(%rsi),%xmm0 |
3827 | pshufd $0xb1,%xmm5,%xmm3 |
3828 | movdqu 16(%rsi),%xmm1 |
3829 | movdqu 32(%rsi),%xmm2 |
3830 | por %xmm3,%xmm5 |
3831 | movdqu 48(%rsi),%xmm3 |
3832 | movq 64+0(%rsi),%rdx |
3833 | movq 64+8(%rsi),%r14 |
3834 | movq 64+16(%rsi),%r15 |
3835 | movq 64+24(%rsi),%r8 |
3836 | movdqa %xmm0,480(%rsp) |
3837 | pshufd $0x1e,%xmm5,%xmm4 |
3838 | movdqa %xmm1,480+16(%rsp) |
3839 | movdqu 64(%rsi),%xmm0 |
3840 | movdqu 80(%rsi),%xmm1 |
3841 | movdqa %xmm2,512(%rsp) |
3842 | movdqa %xmm3,512+16(%rsp) |
3843 | por %xmm4,%xmm5 |
3844 | pxor %xmm4,%xmm4 |
3845 | por %xmm0,%xmm1 |
3846 | .byte 102,72,15,110,199 |
3847 | |
3848 | leaq 64-128(%rsi),%rsi |
3849 | movq %rdx,544+0(%rsp) |
3850 | movq %r14,544+8(%rsp) |
3851 | movq %r15,544+16(%rsp) |
3852 | movq %r8,544+24(%rsp) |
3853 | leaq 96(%rsp),%rdi |
3854 | call __ecp_nistz256_sqr_montx |
3855 | |
3856 | pcmpeqd %xmm4,%xmm5 |
3857 | pshufd $0xb1,%xmm1,%xmm4 |
3858 | por %xmm1,%xmm4 |
3859 | pshufd $0,%xmm5,%xmm5 |
3860 | pshufd $0x1e,%xmm4,%xmm3 |
3861 | por %xmm3,%xmm4 |
3862 | pxor %xmm3,%xmm3 |
3863 | pcmpeqd %xmm3,%xmm4 |
3864 | pshufd $0,%xmm4,%xmm4 |
3865 | movq 64+0(%rbx),%rdx |
3866 | movq 64+8(%rbx),%r14 |
3867 | movq 64+16(%rbx),%r15 |
3868 | movq 64+24(%rbx),%r8 |
3869 | .byte 102,72,15,110,203 |
3870 | |
3871 | leaq 64-128(%rbx),%rsi |
3872 | leaq 32(%rsp),%rdi |
3873 | call __ecp_nistz256_sqr_montx |
3874 | |
3875 | movq 544(%rsp),%rdx |
3876 | leaq 544(%rsp),%rbx |
3877 | movq 0+96(%rsp),%r9 |
3878 | movq 8+96(%rsp),%r10 |
3879 | leaq -128+96(%rsp),%rsi |
3880 | movq 16+96(%rsp),%r11 |
3881 | movq 24+96(%rsp),%r12 |
3882 | leaq 224(%rsp),%rdi |
3883 | call __ecp_nistz256_mul_montx |
3884 | |
3885 | movq 448(%rsp),%rdx |
3886 | leaq 448(%rsp),%rbx |
3887 | movq 0+32(%rsp),%r9 |
3888 | movq 8+32(%rsp),%r10 |
3889 | leaq -128+32(%rsp),%rsi |
3890 | movq 16+32(%rsp),%r11 |
3891 | movq 24+32(%rsp),%r12 |
3892 | leaq 256(%rsp),%rdi |
3893 | call __ecp_nistz256_mul_montx |
3894 | |
3895 | movq 416(%rsp),%rdx |
3896 | leaq 416(%rsp),%rbx |
3897 | movq 0+224(%rsp),%r9 |
3898 | movq 8+224(%rsp),%r10 |
3899 | leaq -128+224(%rsp),%rsi |
3900 | movq 16+224(%rsp),%r11 |
3901 | movq 24+224(%rsp),%r12 |
3902 | leaq 224(%rsp),%rdi |
3903 | call __ecp_nistz256_mul_montx |
3904 | |
3905 | movq 512(%rsp),%rdx |
3906 | leaq 512(%rsp),%rbx |
3907 | movq 0+256(%rsp),%r9 |
3908 | movq 8+256(%rsp),%r10 |
3909 | leaq -128+256(%rsp),%rsi |
3910 | movq 16+256(%rsp),%r11 |
3911 | movq 24+256(%rsp),%r12 |
3912 | leaq 256(%rsp),%rdi |
3913 | call __ecp_nistz256_mul_montx |
3914 | |
3915 | leaq 224(%rsp),%rbx |
3916 | leaq 64(%rsp),%rdi |
3917 | call __ecp_nistz256_sub_fromx |
3918 | |
3919 | orq %r13,%r12 |
3920 | movdqa %xmm4,%xmm2 |
3921 | orq %r8,%r12 |
3922 | orq %r9,%r12 |
3923 | por %xmm5,%xmm2 |
3924 | .byte 102,73,15,110,220 |
3925 | |
3926 | movq 384(%rsp),%rdx |
3927 | leaq 384(%rsp),%rbx |
3928 | movq 0+96(%rsp),%r9 |
3929 | movq 8+96(%rsp),%r10 |
3930 | leaq -128+96(%rsp),%rsi |
3931 | movq 16+96(%rsp),%r11 |
3932 | movq 24+96(%rsp),%r12 |
3933 | leaq 160(%rsp),%rdi |
3934 | call __ecp_nistz256_mul_montx |
3935 | |
3936 | movq 480(%rsp),%rdx |
3937 | leaq 480(%rsp),%rbx |
3938 | movq 0+32(%rsp),%r9 |
3939 | movq 8+32(%rsp),%r10 |
3940 | leaq -128+32(%rsp),%rsi |
3941 | movq 16+32(%rsp),%r11 |
3942 | movq 24+32(%rsp),%r12 |
3943 | leaq 192(%rsp),%rdi |
3944 | call __ecp_nistz256_mul_montx |
3945 | |
3946 | leaq 160(%rsp),%rbx |
3947 | leaq 0(%rsp),%rdi |
3948 | call __ecp_nistz256_sub_fromx |
3949 | |
3950 | orq %r13,%r12 |
3951 | orq %r8,%r12 |
3952 | orq %r9,%r12 |
3953 | |
3954 | .byte 102,73,15,126,208 |
3955 | .byte 102,73,15,126,217 |
3956 | orq %r8,%r12 |
3957 | .byte 0x3e |
3958 | jnz .Ladd_proceedx |
3959 | |
3960 | |
3961 | |
3962 | testq %r9,%r9 |
3963 | jz .Ladd_doublex |
3964 | |
3965 | |
3966 | |
3967 | |
3968 | |
3969 | |
3970 | .byte 102,72,15,126,199 |
3971 | pxor %xmm0,%xmm0 |
3972 | movdqu %xmm0,0(%rdi) |
3973 | movdqu %xmm0,16(%rdi) |
3974 | movdqu %xmm0,32(%rdi) |
3975 | movdqu %xmm0,48(%rdi) |
3976 | movdqu %xmm0,64(%rdi) |
3977 | movdqu %xmm0,80(%rdi) |
3978 | jmp .Ladd_donex |
3979 | |
3980 | .align 32 |
3981 | .Ladd_doublex: |
3982 | .byte 102,72,15,126,206 |
3983 | .byte 102,72,15,126,199 |
3984 | addq $416,%rsp |
3985 | .cfi_adjust_cfa_offset -416 |
3986 | jmp .Lpoint_double_shortcutx |
3987 | .cfi_adjust_cfa_offset 416 |
3988 | |
3989 | .align 32 |
3990 | .Ladd_proceedx: |
3991 | movq 0+64(%rsp),%rdx |
3992 | movq 8+64(%rsp),%r14 |
3993 | leaq -128+64(%rsp),%rsi |
3994 | movq 16+64(%rsp),%r15 |
3995 | movq 24+64(%rsp),%r8 |
3996 | leaq 96(%rsp),%rdi |
3997 | call __ecp_nistz256_sqr_montx |
3998 | |
3999 | movq 448(%rsp),%rdx |
4000 | leaq 448(%rsp),%rbx |
4001 | movq 0+0(%rsp),%r9 |
4002 | movq 8+0(%rsp),%r10 |
4003 | leaq -128+0(%rsp),%rsi |
4004 | movq 16+0(%rsp),%r11 |
4005 | movq 24+0(%rsp),%r12 |
4006 | leaq 352(%rsp),%rdi |
4007 | call __ecp_nistz256_mul_montx |
4008 | |
4009 | movq 0+0(%rsp),%rdx |
4010 | movq 8+0(%rsp),%r14 |
4011 | leaq -128+0(%rsp),%rsi |
4012 | movq 16+0(%rsp),%r15 |
4013 | movq 24+0(%rsp),%r8 |
4014 | leaq 32(%rsp),%rdi |
4015 | call __ecp_nistz256_sqr_montx |
4016 | |
4017 | movq 544(%rsp),%rdx |
4018 | leaq 544(%rsp),%rbx |
4019 | movq 0+352(%rsp),%r9 |
4020 | movq 8+352(%rsp),%r10 |
4021 | leaq -128+352(%rsp),%rsi |
4022 | movq 16+352(%rsp),%r11 |
4023 | movq 24+352(%rsp),%r12 |
4024 | leaq 352(%rsp),%rdi |
4025 | call __ecp_nistz256_mul_montx |
4026 | |
4027 | movq 0(%rsp),%rdx |
4028 | leaq 0(%rsp),%rbx |
4029 | movq 0+32(%rsp),%r9 |
4030 | movq 8+32(%rsp),%r10 |
4031 | leaq -128+32(%rsp),%rsi |
4032 | movq 16+32(%rsp),%r11 |
4033 | movq 24+32(%rsp),%r12 |
4034 | leaq 128(%rsp),%rdi |
4035 | call __ecp_nistz256_mul_montx |
4036 | |
4037 | movq 160(%rsp),%rdx |
4038 | leaq 160(%rsp),%rbx |
4039 | movq 0+32(%rsp),%r9 |
4040 | movq 8+32(%rsp),%r10 |
4041 | leaq -128+32(%rsp),%rsi |
4042 | movq 16+32(%rsp),%r11 |
4043 | movq 24+32(%rsp),%r12 |
4044 | leaq 192(%rsp),%rdi |
4045 | call __ecp_nistz256_mul_montx |
4046 | |
4047 | |
4048 | |
4049 | |
4050 | xorq %r11,%r11 |
4051 | addq %r12,%r12 |
4052 | leaq 96(%rsp),%rsi |
4053 | adcq %r13,%r13 |
4054 | movq %r12,%rax |
4055 | adcq %r8,%r8 |
4056 | adcq %r9,%r9 |
4057 | movq %r13,%rbp |
4058 | adcq $0,%r11 |
4059 | |
4060 | subq $-1,%r12 |
4061 | movq %r8,%rcx |
4062 | sbbq %r14,%r13 |
4063 | sbbq $0,%r8 |
4064 | movq %r9,%r10 |
4065 | sbbq %r15,%r9 |
4066 | sbbq $0,%r11 |
4067 | |
4068 | cmovcq %rax,%r12 |
4069 | movq 0(%rsi),%rax |
4070 | cmovcq %rbp,%r13 |
4071 | movq 8(%rsi),%rbp |
4072 | cmovcq %rcx,%r8 |
4073 | movq 16(%rsi),%rcx |
4074 | cmovcq %r10,%r9 |
4075 | movq 24(%rsi),%r10 |
4076 | |
4077 | call __ecp_nistz256_subx |
4078 | |
4079 | leaq 128(%rsp),%rbx |
4080 | leaq 288(%rsp),%rdi |
4081 | call __ecp_nistz256_sub_fromx |
4082 | |
4083 | movq 192+0(%rsp),%rax |
4084 | movq 192+8(%rsp),%rbp |
4085 | movq 192+16(%rsp),%rcx |
4086 | movq 192+24(%rsp),%r10 |
4087 | leaq 320(%rsp),%rdi |
4088 | |
4089 | call __ecp_nistz256_subx |
4090 | |
4091 | movq %r12,0(%rdi) |
4092 | movq %r13,8(%rdi) |
4093 | movq %r8,16(%rdi) |
4094 | movq %r9,24(%rdi) |
4095 | movq 128(%rsp),%rdx |
4096 | leaq 128(%rsp),%rbx |
4097 | movq 0+224(%rsp),%r9 |
4098 | movq 8+224(%rsp),%r10 |
4099 | leaq -128+224(%rsp),%rsi |
4100 | movq 16+224(%rsp),%r11 |
4101 | movq 24+224(%rsp),%r12 |
4102 | leaq 256(%rsp),%rdi |
4103 | call __ecp_nistz256_mul_montx |
4104 | |
4105 | movq 320(%rsp),%rdx |
4106 | leaq 320(%rsp),%rbx |
4107 | movq 0+64(%rsp),%r9 |
4108 | movq 8+64(%rsp),%r10 |
4109 | leaq -128+64(%rsp),%rsi |
4110 | movq 16+64(%rsp),%r11 |
4111 | movq 24+64(%rsp),%r12 |
4112 | leaq 320(%rsp),%rdi |
4113 | call __ecp_nistz256_mul_montx |
4114 | |
4115 | leaq 256(%rsp),%rbx |
4116 | leaq 320(%rsp),%rdi |
4117 | call __ecp_nistz256_sub_fromx |
4118 | |
4119 | .byte 102,72,15,126,199 |
4120 | |
4121 | movdqa %xmm5,%xmm0 |
4122 | movdqa %xmm5,%xmm1 |
4123 | pandn 352(%rsp),%xmm0 |
4124 | movdqa %xmm5,%xmm2 |
4125 | pandn 352+16(%rsp),%xmm1 |
4126 | movdqa %xmm5,%xmm3 |
4127 | pand 544(%rsp),%xmm2 |
4128 | pand 544+16(%rsp),%xmm3 |
4129 | por %xmm0,%xmm2 |
4130 | por %xmm1,%xmm3 |
4131 | |
4132 | movdqa %xmm4,%xmm0 |
4133 | movdqa %xmm4,%xmm1 |
4134 | pandn %xmm2,%xmm0 |
4135 | movdqa %xmm4,%xmm2 |
4136 | pandn %xmm3,%xmm1 |
4137 | movdqa %xmm4,%xmm3 |
4138 | pand 448(%rsp),%xmm2 |
4139 | pand 448+16(%rsp),%xmm3 |
4140 | por %xmm0,%xmm2 |
4141 | por %xmm1,%xmm3 |
4142 | movdqu %xmm2,64(%rdi) |
4143 | movdqu %xmm3,80(%rdi) |
4144 | |
4145 | movdqa %xmm5,%xmm0 |
4146 | movdqa %xmm5,%xmm1 |
4147 | pandn 288(%rsp),%xmm0 |
4148 | movdqa %xmm5,%xmm2 |
4149 | pandn 288+16(%rsp),%xmm1 |
4150 | movdqa %xmm5,%xmm3 |
4151 | pand 480(%rsp),%xmm2 |
4152 | pand 480+16(%rsp),%xmm3 |
4153 | por %xmm0,%xmm2 |
4154 | por %xmm1,%xmm3 |
4155 | |
4156 | movdqa %xmm4,%xmm0 |
4157 | movdqa %xmm4,%xmm1 |
4158 | pandn %xmm2,%xmm0 |
4159 | movdqa %xmm4,%xmm2 |
4160 | pandn %xmm3,%xmm1 |
4161 | movdqa %xmm4,%xmm3 |
4162 | pand 384(%rsp),%xmm2 |
4163 | pand 384+16(%rsp),%xmm3 |
4164 | por %xmm0,%xmm2 |
4165 | por %xmm1,%xmm3 |
4166 | movdqu %xmm2,0(%rdi) |
4167 | movdqu %xmm3,16(%rdi) |
4168 | |
4169 | movdqa %xmm5,%xmm0 |
4170 | movdqa %xmm5,%xmm1 |
4171 | pandn 320(%rsp),%xmm0 |
4172 | movdqa %xmm5,%xmm2 |
4173 | pandn 320+16(%rsp),%xmm1 |
4174 | movdqa %xmm5,%xmm3 |
4175 | pand 512(%rsp),%xmm2 |
4176 | pand 512+16(%rsp),%xmm3 |
4177 | por %xmm0,%xmm2 |
4178 | por %xmm1,%xmm3 |
4179 | |
4180 | movdqa %xmm4,%xmm0 |
4181 | movdqa %xmm4,%xmm1 |
4182 | pandn %xmm2,%xmm0 |
4183 | movdqa %xmm4,%xmm2 |
4184 | pandn %xmm3,%xmm1 |
4185 | movdqa %xmm4,%xmm3 |
4186 | pand 416(%rsp),%xmm2 |
4187 | pand 416+16(%rsp),%xmm3 |
4188 | por %xmm0,%xmm2 |
4189 | por %xmm1,%xmm3 |
4190 | movdqu %xmm2,32(%rdi) |
4191 | movdqu %xmm3,48(%rdi) |
4192 | |
4193 | .Ladd_donex: |
4194 | leaq 576+56(%rsp),%rsi |
4195 | .cfi_def_cfa %rsi,8 |
4196 | movq -48(%rsi),%r15 |
4197 | .cfi_restore %r15 |
4198 | movq -40(%rsi),%r14 |
4199 | .cfi_restore %r14 |
4200 | movq -32(%rsi),%r13 |
4201 | .cfi_restore %r13 |
4202 | movq -24(%rsi),%r12 |
4203 | .cfi_restore %r12 |
4204 | movq -16(%rsi),%rbx |
4205 | .cfi_restore %rbx |
4206 | movq -8(%rsi),%rbp |
4207 | .cfi_restore %rbp |
4208 | leaq (%rsi),%rsp |
4209 | .cfi_def_cfa_register %rsp |
4210 | .Lpoint_addx_epilogue: |
4211 | .byte 0xf3,0xc3 |
4212 | .cfi_endproc |
4213 | .size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx |
4214 | .type ecp_nistz256_point_add_affinex,@function |
4215 | .align 32 |
4216 | ecp_nistz256_point_add_affinex: |
4217 | .cfi_startproc |
4218 | .Lpoint_add_affinex: |
4219 | pushq %rbp |
4220 | .cfi_adjust_cfa_offset 8 |
4221 | .cfi_offset %rbp,-16 |
4222 | pushq %rbx |
4223 | .cfi_adjust_cfa_offset 8 |
4224 | .cfi_offset %rbx,-24 |
4225 | pushq %r12 |
4226 | .cfi_adjust_cfa_offset 8 |
4227 | .cfi_offset %r12,-32 |
4228 | pushq %r13 |
4229 | .cfi_adjust_cfa_offset 8 |
4230 | .cfi_offset %r13,-40 |
4231 | pushq %r14 |
4232 | .cfi_adjust_cfa_offset 8 |
4233 | .cfi_offset %r14,-48 |
4234 | pushq %r15 |
4235 | .cfi_adjust_cfa_offset 8 |
4236 | .cfi_offset %r15,-56 |
4237 | subq $480+8,%rsp |
4238 | .cfi_adjust_cfa_offset 32*15+8 |
4239 | .Ladd_affinex_body: |
4240 | |
4241 | movdqu 0(%rsi),%xmm0 |
4242 | movq %rdx,%rbx |
4243 | movdqu 16(%rsi),%xmm1 |
4244 | movdqu 32(%rsi),%xmm2 |
4245 | movdqu 48(%rsi),%xmm3 |
4246 | movdqu 64(%rsi),%xmm4 |
4247 | movdqu 80(%rsi),%xmm5 |
4248 | movq 64+0(%rsi),%rdx |
4249 | movq 64+8(%rsi),%r14 |
4250 | movq 64+16(%rsi),%r15 |
4251 | movq 64+24(%rsi),%r8 |
4252 | movdqa %xmm0,320(%rsp) |
4253 | movdqa %xmm1,320+16(%rsp) |
4254 | movdqa %xmm2,352(%rsp) |
4255 | movdqa %xmm3,352+16(%rsp) |
4256 | movdqa %xmm4,384(%rsp) |
4257 | movdqa %xmm5,384+16(%rsp) |
4258 | por %xmm4,%xmm5 |
4259 | |
4260 | movdqu 0(%rbx),%xmm0 |
4261 | pshufd $0xb1,%xmm5,%xmm3 |
4262 | movdqu 16(%rbx),%xmm1 |
4263 | movdqu 32(%rbx),%xmm2 |
4264 | por %xmm3,%xmm5 |
4265 | movdqu 48(%rbx),%xmm3 |
4266 | movdqa %xmm0,416(%rsp) |
4267 | pshufd $0x1e,%xmm5,%xmm4 |
4268 | movdqa %xmm1,416+16(%rsp) |
4269 | por %xmm0,%xmm1 |
4270 | .byte 102,72,15,110,199 |
4271 | movdqa %xmm2,448(%rsp) |
4272 | movdqa %xmm3,448+16(%rsp) |
4273 | por %xmm2,%xmm3 |
4274 | por %xmm4,%xmm5 |
4275 | pxor %xmm4,%xmm4 |
4276 | por %xmm1,%xmm3 |
4277 | |
4278 | leaq 64-128(%rsi),%rsi |
4279 | leaq 32(%rsp),%rdi |
4280 | call __ecp_nistz256_sqr_montx |
4281 | |
4282 | pcmpeqd %xmm4,%xmm5 |
4283 | pshufd $0xb1,%xmm3,%xmm4 |
4284 | movq 0(%rbx),%rdx |
4285 | |
4286 | movq %r12,%r9 |
4287 | por %xmm3,%xmm4 |
4288 | pshufd $0,%xmm5,%xmm5 |
4289 | pshufd $0x1e,%xmm4,%xmm3 |
4290 | movq %r13,%r10 |
4291 | por %xmm3,%xmm4 |
4292 | pxor %xmm3,%xmm3 |
4293 | movq %r14,%r11 |
4294 | pcmpeqd %xmm3,%xmm4 |
4295 | pshufd $0,%xmm4,%xmm4 |
4296 | |
4297 | leaq 32-128(%rsp),%rsi |
4298 | movq %r15,%r12 |
4299 | leaq 0(%rsp),%rdi |
4300 | call __ecp_nistz256_mul_montx |
4301 | |
4302 | leaq 320(%rsp),%rbx |
4303 | leaq 64(%rsp),%rdi |
4304 | call __ecp_nistz256_sub_fromx |
4305 | |
4306 | movq 384(%rsp),%rdx |
4307 | leaq 384(%rsp),%rbx |
4308 | movq 0+32(%rsp),%r9 |
4309 | movq 8+32(%rsp),%r10 |
4310 | leaq -128+32(%rsp),%rsi |
4311 | movq 16+32(%rsp),%r11 |
4312 | movq 24+32(%rsp),%r12 |
4313 | leaq 32(%rsp),%rdi |
4314 | call __ecp_nistz256_mul_montx |
4315 | |
4316 | movq 384(%rsp),%rdx |
4317 | leaq 384(%rsp),%rbx |
4318 | movq 0+64(%rsp),%r9 |
4319 | movq 8+64(%rsp),%r10 |
4320 | leaq -128+64(%rsp),%rsi |
4321 | movq 16+64(%rsp),%r11 |
4322 | movq 24+64(%rsp),%r12 |
4323 | leaq 288(%rsp),%rdi |
4324 | call __ecp_nistz256_mul_montx |
4325 | |
4326 | movq 448(%rsp),%rdx |
4327 | leaq 448(%rsp),%rbx |
4328 | movq 0+32(%rsp),%r9 |
4329 | movq 8+32(%rsp),%r10 |
4330 | leaq -128+32(%rsp),%rsi |
4331 | movq 16+32(%rsp),%r11 |
4332 | movq 24+32(%rsp),%r12 |
4333 | leaq 32(%rsp),%rdi |
4334 | call __ecp_nistz256_mul_montx |
4335 | |
4336 | leaq 352(%rsp),%rbx |
4337 | leaq 96(%rsp),%rdi |
4338 | call __ecp_nistz256_sub_fromx |
4339 | |
4340 | movq 0+64(%rsp),%rdx |
4341 | movq 8+64(%rsp),%r14 |
4342 | leaq -128+64(%rsp),%rsi |
4343 | movq 16+64(%rsp),%r15 |
4344 | movq 24+64(%rsp),%r8 |
4345 | leaq 128(%rsp),%rdi |
4346 | call __ecp_nistz256_sqr_montx |
4347 | |
4348 | movq 0+96(%rsp),%rdx |
4349 | movq 8+96(%rsp),%r14 |
4350 | leaq -128+96(%rsp),%rsi |
4351 | movq 16+96(%rsp),%r15 |
4352 | movq 24+96(%rsp),%r8 |
4353 | leaq 192(%rsp),%rdi |
4354 | call __ecp_nistz256_sqr_montx |
4355 | |
4356 | movq 128(%rsp),%rdx |
4357 | leaq 128(%rsp),%rbx |
4358 | movq 0+64(%rsp),%r9 |
4359 | movq 8+64(%rsp),%r10 |
4360 | leaq -128+64(%rsp),%rsi |
4361 | movq 16+64(%rsp),%r11 |
4362 | movq 24+64(%rsp),%r12 |
4363 | leaq 160(%rsp),%rdi |
4364 | call __ecp_nistz256_mul_montx |
4365 | |
4366 | movq 320(%rsp),%rdx |
4367 | leaq 320(%rsp),%rbx |
4368 | movq 0+128(%rsp),%r9 |
4369 | movq 8+128(%rsp),%r10 |
4370 | leaq -128+128(%rsp),%rsi |
4371 | movq 16+128(%rsp),%r11 |
4372 | movq 24+128(%rsp),%r12 |
4373 | leaq 0(%rsp),%rdi |
4374 | call __ecp_nistz256_mul_montx |
4375 | |
4376 | |
4377 | |
4378 | |
4379 | xorq %r11,%r11 |
4380 | addq %r12,%r12 |
4381 | leaq 192(%rsp),%rsi |
4382 | adcq %r13,%r13 |
4383 | movq %r12,%rax |
4384 | adcq %r8,%r8 |
4385 | adcq %r9,%r9 |
4386 | movq %r13,%rbp |
4387 | adcq $0,%r11 |
4388 | |
4389 | subq $-1,%r12 |
4390 | movq %r8,%rcx |
4391 | sbbq %r14,%r13 |
4392 | sbbq $0,%r8 |
4393 | movq %r9,%r10 |
4394 | sbbq %r15,%r9 |
4395 | sbbq $0,%r11 |
4396 | |
4397 | cmovcq %rax,%r12 |
4398 | movq 0(%rsi),%rax |
4399 | cmovcq %rbp,%r13 |
4400 | movq 8(%rsi),%rbp |
4401 | cmovcq %rcx,%r8 |
4402 | movq 16(%rsi),%rcx |
4403 | cmovcq %r10,%r9 |
4404 | movq 24(%rsi),%r10 |
4405 | |
4406 | call __ecp_nistz256_subx |
4407 | |
4408 | leaq 160(%rsp),%rbx |
4409 | leaq 224(%rsp),%rdi |
4410 | call __ecp_nistz256_sub_fromx |
4411 | |
4412 | movq 0+0(%rsp),%rax |
4413 | movq 0+8(%rsp),%rbp |
4414 | movq 0+16(%rsp),%rcx |
4415 | movq 0+24(%rsp),%r10 |
4416 | leaq 64(%rsp),%rdi |
4417 | |
4418 | call __ecp_nistz256_subx |
4419 | |
4420 | movq %r12,0(%rdi) |
4421 | movq %r13,8(%rdi) |
4422 | movq %r8,16(%rdi) |
4423 | movq %r9,24(%rdi) |
4424 | movq 352(%rsp),%rdx |
4425 | leaq 352(%rsp),%rbx |
4426 | movq 0+160(%rsp),%r9 |
4427 | movq 8+160(%rsp),%r10 |
4428 | leaq -128+160(%rsp),%rsi |
4429 | movq 16+160(%rsp),%r11 |
4430 | movq 24+160(%rsp),%r12 |
4431 | leaq 32(%rsp),%rdi |
4432 | call __ecp_nistz256_mul_montx |
4433 | |
4434 | movq 96(%rsp),%rdx |
4435 | leaq 96(%rsp),%rbx |
4436 | movq 0+64(%rsp),%r9 |
4437 | movq 8+64(%rsp),%r10 |
4438 | leaq -128+64(%rsp),%rsi |
4439 | movq 16+64(%rsp),%r11 |
4440 | movq 24+64(%rsp),%r12 |
4441 | leaq 64(%rsp),%rdi |
4442 | call __ecp_nistz256_mul_montx |
4443 | |
4444 | leaq 32(%rsp),%rbx |
4445 | leaq 256(%rsp),%rdi |
4446 | call __ecp_nistz256_sub_fromx |
4447 | |
4448 | .byte 102,72,15,126,199 |
4449 | |
4450 | movdqa %xmm5,%xmm0 |
4451 | movdqa %xmm5,%xmm1 |
4452 | pandn 288(%rsp),%xmm0 |
4453 | movdqa %xmm5,%xmm2 |
4454 | pandn 288+16(%rsp),%xmm1 |
4455 | movdqa %xmm5,%xmm3 |
4456 | pand .LONE_mont(%rip),%xmm2 |
4457 | pand .LONE_mont+16(%rip),%xmm3 |
4458 | por %xmm0,%xmm2 |
4459 | por %xmm1,%xmm3 |
4460 | |
4461 | movdqa %xmm4,%xmm0 |
4462 | movdqa %xmm4,%xmm1 |
4463 | pandn %xmm2,%xmm0 |
4464 | movdqa %xmm4,%xmm2 |
4465 | pandn %xmm3,%xmm1 |
4466 | movdqa %xmm4,%xmm3 |
4467 | pand 384(%rsp),%xmm2 |
4468 | pand 384+16(%rsp),%xmm3 |
4469 | por %xmm0,%xmm2 |
4470 | por %xmm1,%xmm3 |
4471 | movdqu %xmm2,64(%rdi) |
4472 | movdqu %xmm3,80(%rdi) |
4473 | |
4474 | movdqa %xmm5,%xmm0 |
4475 | movdqa %xmm5,%xmm1 |
4476 | pandn 224(%rsp),%xmm0 |
4477 | movdqa %xmm5,%xmm2 |
4478 | pandn 224+16(%rsp),%xmm1 |
4479 | movdqa %xmm5,%xmm3 |
4480 | pand 416(%rsp),%xmm2 |
4481 | pand 416+16(%rsp),%xmm3 |
4482 | por %xmm0,%xmm2 |
4483 | por %xmm1,%xmm3 |
4484 | |
4485 | movdqa %xmm4,%xmm0 |
4486 | movdqa %xmm4,%xmm1 |
4487 | pandn %xmm2,%xmm0 |
4488 | movdqa %xmm4,%xmm2 |
4489 | pandn %xmm3,%xmm1 |
4490 | movdqa %xmm4,%xmm3 |
4491 | pand 320(%rsp),%xmm2 |
4492 | pand 320+16(%rsp),%xmm3 |
4493 | por %xmm0,%xmm2 |
4494 | por %xmm1,%xmm3 |
4495 | movdqu %xmm2,0(%rdi) |
4496 | movdqu %xmm3,16(%rdi) |
4497 | |
4498 | movdqa %xmm5,%xmm0 |
4499 | movdqa %xmm5,%xmm1 |
4500 | pandn 256(%rsp),%xmm0 |
4501 | movdqa %xmm5,%xmm2 |
4502 | pandn 256+16(%rsp),%xmm1 |
4503 | movdqa %xmm5,%xmm3 |
4504 | pand 448(%rsp),%xmm2 |
4505 | pand 448+16(%rsp),%xmm3 |
4506 | por %xmm0,%xmm2 |
4507 | por %xmm1,%xmm3 |
4508 | |
4509 | movdqa %xmm4,%xmm0 |
4510 | movdqa %xmm4,%xmm1 |
4511 | pandn %xmm2,%xmm0 |
4512 | movdqa %xmm4,%xmm2 |
4513 | pandn %xmm3,%xmm1 |
4514 | movdqa %xmm4,%xmm3 |
4515 | pand 352(%rsp),%xmm2 |
4516 | pand 352+16(%rsp),%xmm3 |
4517 | por %xmm0,%xmm2 |
4518 | por %xmm1,%xmm3 |
4519 | movdqu %xmm2,32(%rdi) |
4520 | movdqu %xmm3,48(%rdi) |
4521 | |
4522 | leaq 480+56(%rsp),%rsi |
4523 | .cfi_def_cfa %rsi,8 |
4524 | movq -48(%rsi),%r15 |
4525 | .cfi_restore %r15 |
4526 | movq -40(%rsi),%r14 |
4527 | .cfi_restore %r14 |
4528 | movq -32(%rsi),%r13 |
4529 | .cfi_restore %r13 |
4530 | movq -24(%rsi),%r12 |
4531 | .cfi_restore %r12 |
4532 | movq -16(%rsi),%rbx |
4533 | .cfi_restore %rbx |
4534 | movq -8(%rsi),%rbp |
4535 | .cfi_restore %rbp |
4536 | leaq (%rsi),%rsp |
4537 | .cfi_def_cfa_register %rsp |
4538 | .Ladd_affinex_epilogue: |
4539 | .byte 0xf3,0xc3 |
4540 | .cfi_endproc |
4541 | .size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex |
4542 | #endif |
4543 | |