1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15.extern OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
17
18.globl gcm_gmult_4bit
19.hidden gcm_gmult_4bit
20.type gcm_gmult_4bit,@function
21.align 16
22gcm_gmult_4bit:
23.cfi_startproc
24 pushq %rbx
25.cfi_adjust_cfa_offset 8
26.cfi_offset %rbx,-16
27 pushq %rbp
28.cfi_adjust_cfa_offset 8
29.cfi_offset %rbp,-24
30 pushq %r12
31.cfi_adjust_cfa_offset 8
32.cfi_offset %r12,-32
33 pushq %r13
34.cfi_adjust_cfa_offset 8
35.cfi_offset %r13,-40
36 pushq %r14
37.cfi_adjust_cfa_offset 8
38.cfi_offset %r14,-48
39 pushq %r15
40.cfi_adjust_cfa_offset 8
41.cfi_offset %r15,-56
42 subq $280,%rsp
43.cfi_adjust_cfa_offset 280
44.Lgmult_prologue:
45
46 movzbq 15(%rdi),%r8
47 leaq .Lrem_4bit(%rip),%r11
48 xorq %rax,%rax
49 xorq %rbx,%rbx
50 movb %r8b,%al
51 movb %r8b,%bl
52 shlb $4,%al
53 movq $14,%rcx
54 movq 8(%rsi,%rax,1),%r8
55 movq (%rsi,%rax,1),%r9
56 andb $0xf0,%bl
57 movq %r8,%rdx
58 jmp .Loop1
59
60.align 16
61.Loop1:
62 shrq $4,%r8
63 andq $0xf,%rdx
64 movq %r9,%r10
65 movb (%rdi,%rcx,1),%al
66 shrq $4,%r9
67 xorq 8(%rsi,%rbx,1),%r8
68 shlq $60,%r10
69 xorq (%rsi,%rbx,1),%r9
70 movb %al,%bl
71 xorq (%r11,%rdx,8),%r9
72 movq %r8,%rdx
73 shlb $4,%al
74 xorq %r10,%r8
75 decq %rcx
76 js .Lbreak1
77
78 shrq $4,%r8
79 andq $0xf,%rdx
80 movq %r9,%r10
81 shrq $4,%r9
82 xorq 8(%rsi,%rax,1),%r8
83 shlq $60,%r10
84 xorq (%rsi,%rax,1),%r9
85 andb $0xf0,%bl
86 xorq (%r11,%rdx,8),%r9
87 movq %r8,%rdx
88 xorq %r10,%r8
89 jmp .Loop1
90
91.align 16
92.Lbreak1:
93 shrq $4,%r8
94 andq $0xf,%rdx
95 movq %r9,%r10
96 shrq $4,%r9
97 xorq 8(%rsi,%rax,1),%r8
98 shlq $60,%r10
99 xorq (%rsi,%rax,1),%r9
100 andb $0xf0,%bl
101 xorq (%r11,%rdx,8),%r9
102 movq %r8,%rdx
103 xorq %r10,%r8
104
105 shrq $4,%r8
106 andq $0xf,%rdx
107 movq %r9,%r10
108 shrq $4,%r9
109 xorq 8(%rsi,%rbx,1),%r8
110 shlq $60,%r10
111 xorq (%rsi,%rbx,1),%r9
112 xorq %r10,%r8
113 xorq (%r11,%rdx,8),%r9
114
115 bswapq %r8
116 bswapq %r9
117 movq %r8,8(%rdi)
118 movq %r9,(%rdi)
119
120 leaq 280+48(%rsp),%rsi
121.cfi_def_cfa %rsi,8
122 movq -8(%rsi),%rbx
123.cfi_restore %rbx
124 leaq (%rsi),%rsp
125.cfi_def_cfa_register %rsp
126.Lgmult_epilogue:
127 .byte 0xf3,0xc3
128.cfi_endproc
129.size gcm_gmult_4bit,.-gcm_gmult_4bit
130.globl gcm_ghash_4bit
131.hidden gcm_ghash_4bit
132.type gcm_ghash_4bit,@function
133.align 16
134gcm_ghash_4bit:
135.cfi_startproc
136 pushq %rbx
137.cfi_adjust_cfa_offset 8
138.cfi_offset %rbx,-16
139 pushq %rbp
140.cfi_adjust_cfa_offset 8
141.cfi_offset %rbp,-24
142 pushq %r12
143.cfi_adjust_cfa_offset 8
144.cfi_offset %r12,-32
145 pushq %r13
146.cfi_adjust_cfa_offset 8
147.cfi_offset %r13,-40
148 pushq %r14
149.cfi_adjust_cfa_offset 8
150.cfi_offset %r14,-48
151 pushq %r15
152.cfi_adjust_cfa_offset 8
153.cfi_offset %r15,-56
154 subq $280,%rsp
155.cfi_adjust_cfa_offset 280
156.Lghash_prologue:
157 movq %rdx,%r14
158 movq %rcx,%r15
159 subq $-128,%rsi
160 leaq 16+128(%rsp),%rbp
161 xorl %edx,%edx
162 movq 0+0-128(%rsi),%r8
163 movq 0+8-128(%rsi),%rax
164 movb %al,%dl
165 shrq $4,%rax
166 movq %r8,%r10
167 shrq $4,%r8
168 movq 16+0-128(%rsi),%r9
169 shlb $4,%dl
170 movq 16+8-128(%rsi),%rbx
171 shlq $60,%r10
172 movb %dl,0(%rsp)
173 orq %r10,%rax
174 movb %bl,%dl
175 shrq $4,%rbx
176 movq %r9,%r10
177 shrq $4,%r9
178 movq %r8,0(%rbp)
179 movq 32+0-128(%rsi),%r8
180 shlb $4,%dl
181 movq %rax,0-128(%rbp)
182 movq 32+8-128(%rsi),%rax
183 shlq $60,%r10
184 movb %dl,1(%rsp)
185 orq %r10,%rbx
186 movb %al,%dl
187 shrq $4,%rax
188 movq %r8,%r10
189 shrq $4,%r8
190 movq %r9,8(%rbp)
191 movq 48+0-128(%rsi),%r9
192 shlb $4,%dl
193 movq %rbx,8-128(%rbp)
194 movq 48+8-128(%rsi),%rbx
195 shlq $60,%r10
196 movb %dl,2(%rsp)
197 orq %r10,%rax
198 movb %bl,%dl
199 shrq $4,%rbx
200 movq %r9,%r10
201 shrq $4,%r9
202 movq %r8,16(%rbp)
203 movq 64+0-128(%rsi),%r8
204 shlb $4,%dl
205 movq %rax,16-128(%rbp)
206 movq 64+8-128(%rsi),%rax
207 shlq $60,%r10
208 movb %dl,3(%rsp)
209 orq %r10,%rbx
210 movb %al,%dl
211 shrq $4,%rax
212 movq %r8,%r10
213 shrq $4,%r8
214 movq %r9,24(%rbp)
215 movq 80+0-128(%rsi),%r9
216 shlb $4,%dl
217 movq %rbx,24-128(%rbp)
218 movq 80+8-128(%rsi),%rbx
219 shlq $60,%r10
220 movb %dl,4(%rsp)
221 orq %r10,%rax
222 movb %bl,%dl
223 shrq $4,%rbx
224 movq %r9,%r10
225 shrq $4,%r9
226 movq %r8,32(%rbp)
227 movq 96+0-128(%rsi),%r8
228 shlb $4,%dl
229 movq %rax,32-128(%rbp)
230 movq 96+8-128(%rsi),%rax
231 shlq $60,%r10
232 movb %dl,5(%rsp)
233 orq %r10,%rbx
234 movb %al,%dl
235 shrq $4,%rax
236 movq %r8,%r10
237 shrq $4,%r8
238 movq %r9,40(%rbp)
239 movq 112+0-128(%rsi),%r9
240 shlb $4,%dl
241 movq %rbx,40-128(%rbp)
242 movq 112+8-128(%rsi),%rbx
243 shlq $60,%r10
244 movb %dl,6(%rsp)
245 orq %r10,%rax
246 movb %bl,%dl
247 shrq $4,%rbx
248 movq %r9,%r10
249 shrq $4,%r9
250 movq %r8,48(%rbp)
251 movq 128+0-128(%rsi),%r8
252 shlb $4,%dl
253 movq %rax,48-128(%rbp)
254 movq 128+8-128(%rsi),%rax
255 shlq $60,%r10
256 movb %dl,7(%rsp)
257 orq %r10,%rbx
258 movb %al,%dl
259 shrq $4,%rax
260 movq %r8,%r10
261 shrq $4,%r8
262 movq %r9,56(%rbp)
263 movq 144+0-128(%rsi),%r9
264 shlb $4,%dl
265 movq %rbx,56-128(%rbp)
266 movq 144+8-128(%rsi),%rbx
267 shlq $60,%r10
268 movb %dl,8(%rsp)
269 orq %r10,%rax
270 movb %bl,%dl
271 shrq $4,%rbx
272 movq %r9,%r10
273 shrq $4,%r9
274 movq %r8,64(%rbp)
275 movq 160+0-128(%rsi),%r8
276 shlb $4,%dl
277 movq %rax,64-128(%rbp)
278 movq 160+8-128(%rsi),%rax
279 shlq $60,%r10
280 movb %dl,9(%rsp)
281 orq %r10,%rbx
282 movb %al,%dl
283 shrq $4,%rax
284 movq %r8,%r10
285 shrq $4,%r8
286 movq %r9,72(%rbp)
287 movq 176+0-128(%rsi),%r9
288 shlb $4,%dl
289 movq %rbx,72-128(%rbp)
290 movq 176+8-128(%rsi),%rbx
291 shlq $60,%r10
292 movb %dl,10(%rsp)
293 orq %r10,%rax
294 movb %bl,%dl
295 shrq $4,%rbx
296 movq %r9,%r10
297 shrq $4,%r9
298 movq %r8,80(%rbp)
299 movq 192+0-128(%rsi),%r8
300 shlb $4,%dl
301 movq %rax,80-128(%rbp)
302 movq 192+8-128(%rsi),%rax
303 shlq $60,%r10
304 movb %dl,11(%rsp)
305 orq %r10,%rbx
306 movb %al,%dl
307 shrq $4,%rax
308 movq %r8,%r10
309 shrq $4,%r8
310 movq %r9,88(%rbp)
311 movq 208+0-128(%rsi),%r9
312 shlb $4,%dl
313 movq %rbx,88-128(%rbp)
314 movq 208+8-128(%rsi),%rbx
315 shlq $60,%r10
316 movb %dl,12(%rsp)
317 orq %r10,%rax
318 movb %bl,%dl
319 shrq $4,%rbx
320 movq %r9,%r10
321 shrq $4,%r9
322 movq %r8,96(%rbp)
323 movq 224+0-128(%rsi),%r8
324 shlb $4,%dl
325 movq %rax,96-128(%rbp)
326 movq 224+8-128(%rsi),%rax
327 shlq $60,%r10
328 movb %dl,13(%rsp)
329 orq %r10,%rbx
330 movb %al,%dl
331 shrq $4,%rax
332 movq %r8,%r10
333 shrq $4,%r8
334 movq %r9,104(%rbp)
335 movq 240+0-128(%rsi),%r9
336 shlb $4,%dl
337 movq %rbx,104-128(%rbp)
338 movq 240+8-128(%rsi),%rbx
339 shlq $60,%r10
340 movb %dl,14(%rsp)
341 orq %r10,%rax
342 movb %bl,%dl
343 shrq $4,%rbx
344 movq %r9,%r10
345 shrq $4,%r9
346 movq %r8,112(%rbp)
347 shlb $4,%dl
348 movq %rax,112-128(%rbp)
349 shlq $60,%r10
350 movb %dl,15(%rsp)
351 orq %r10,%rbx
352 movq %r9,120(%rbp)
353 movq %rbx,120-128(%rbp)
354 addq $-128,%rsi
355 movq 8(%rdi),%r8
356 movq 0(%rdi),%r9
357 addq %r14,%r15
358 leaq .Lrem_8bit(%rip),%r11
359 jmp .Louter_loop
360.align 16
361.Louter_loop:
362 xorq (%r14),%r9
363 movq 8(%r14),%rdx
364 leaq 16(%r14),%r14
365 xorq %r8,%rdx
366 movq %r9,(%rdi)
367 movq %rdx,8(%rdi)
368 shrq $32,%rdx
369 xorq %rax,%rax
370 roll $8,%edx
371 movb %dl,%al
372 movzbl %dl,%ebx
373 shlb $4,%al
374 shrl $4,%ebx
375 roll $8,%edx
376 movq 8(%rsi,%rax,1),%r8
377 movq (%rsi,%rax,1),%r9
378 movb %dl,%al
379 movzbl %dl,%ecx
380 shlb $4,%al
381 movzbq (%rsp,%rbx,1),%r12
382 shrl $4,%ecx
383 xorq %r8,%r12
384 movq %r9,%r10
385 shrq $8,%r8
386 movzbq %r12b,%r12
387 shrq $8,%r9
388 xorq -128(%rbp,%rbx,8),%r8
389 shlq $56,%r10
390 xorq (%rbp,%rbx,8),%r9
391 roll $8,%edx
392 xorq 8(%rsi,%rax,1),%r8
393 xorq (%rsi,%rax,1),%r9
394 movb %dl,%al
395 xorq %r10,%r8
396 movzwq (%r11,%r12,2),%r12
397 movzbl %dl,%ebx
398 shlb $4,%al
399 movzbq (%rsp,%rcx,1),%r13
400 shrl $4,%ebx
401 shlq $48,%r12
402 xorq %r8,%r13
403 movq %r9,%r10
404 xorq %r12,%r9
405 shrq $8,%r8
406 movzbq %r13b,%r13
407 shrq $8,%r9
408 xorq -128(%rbp,%rcx,8),%r8
409 shlq $56,%r10
410 xorq (%rbp,%rcx,8),%r9
411 roll $8,%edx
412 xorq 8(%rsi,%rax,1),%r8
413 xorq (%rsi,%rax,1),%r9
414 movb %dl,%al
415 xorq %r10,%r8
416 movzwq (%r11,%r13,2),%r13
417 movzbl %dl,%ecx
418 shlb $4,%al
419 movzbq (%rsp,%rbx,1),%r12
420 shrl $4,%ecx
421 shlq $48,%r13
422 xorq %r8,%r12
423 movq %r9,%r10
424 xorq %r13,%r9
425 shrq $8,%r8
426 movzbq %r12b,%r12
427 movl 8(%rdi),%edx
428 shrq $8,%r9
429 xorq -128(%rbp,%rbx,8),%r8
430 shlq $56,%r10
431 xorq (%rbp,%rbx,8),%r9
432 roll $8,%edx
433 xorq 8(%rsi,%rax,1),%r8
434 xorq (%rsi,%rax,1),%r9
435 movb %dl,%al
436 xorq %r10,%r8
437 movzwq (%r11,%r12,2),%r12
438 movzbl %dl,%ebx
439 shlb $4,%al
440 movzbq (%rsp,%rcx,1),%r13
441 shrl $4,%ebx
442 shlq $48,%r12
443 xorq %r8,%r13
444 movq %r9,%r10
445 xorq %r12,%r9
446 shrq $8,%r8
447 movzbq %r13b,%r13
448 shrq $8,%r9
449 xorq -128(%rbp,%rcx,8),%r8
450 shlq $56,%r10
451 xorq (%rbp,%rcx,8),%r9
452 roll $8,%edx
453 xorq 8(%rsi,%rax,1),%r8
454 xorq (%rsi,%rax,1),%r9
455 movb %dl,%al
456 xorq %r10,%r8
457 movzwq (%r11,%r13,2),%r13
458 movzbl %dl,%ecx
459 shlb $4,%al
460 movzbq (%rsp,%rbx,1),%r12
461 shrl $4,%ecx
462 shlq $48,%r13
463 xorq %r8,%r12
464 movq %r9,%r10
465 xorq %r13,%r9
466 shrq $8,%r8
467 movzbq %r12b,%r12
468 shrq $8,%r9
469 xorq -128(%rbp,%rbx,8),%r8
470 shlq $56,%r10
471 xorq (%rbp,%rbx,8),%r9
472 roll $8,%edx
473 xorq 8(%rsi,%rax,1),%r8
474 xorq (%rsi,%rax,1),%r9
475 movb %dl,%al
476 xorq %r10,%r8
477 movzwq (%r11,%r12,2),%r12
478 movzbl %dl,%ebx
479 shlb $4,%al
480 movzbq (%rsp,%rcx,1),%r13
481 shrl $4,%ebx
482 shlq $48,%r12
483 xorq %r8,%r13
484 movq %r9,%r10
485 xorq %r12,%r9
486 shrq $8,%r8
487 movzbq %r13b,%r13
488 shrq $8,%r9
489 xorq -128(%rbp,%rcx,8),%r8
490 shlq $56,%r10
491 xorq (%rbp,%rcx,8),%r9
492 roll $8,%edx
493 xorq 8(%rsi,%rax,1),%r8
494 xorq (%rsi,%rax,1),%r9
495 movb %dl,%al
496 xorq %r10,%r8
497 movzwq (%r11,%r13,2),%r13
498 movzbl %dl,%ecx
499 shlb $4,%al
500 movzbq (%rsp,%rbx,1),%r12
501 shrl $4,%ecx
502 shlq $48,%r13
503 xorq %r8,%r12
504 movq %r9,%r10
505 xorq %r13,%r9
506 shrq $8,%r8
507 movzbq %r12b,%r12
508 movl 4(%rdi),%edx
509 shrq $8,%r9
510 xorq -128(%rbp,%rbx,8),%r8
511 shlq $56,%r10
512 xorq (%rbp,%rbx,8),%r9
513 roll $8,%edx
514 xorq 8(%rsi,%rax,1),%r8
515 xorq (%rsi,%rax,1),%r9
516 movb %dl,%al
517 xorq %r10,%r8
518 movzwq (%r11,%r12,2),%r12
519 movzbl %dl,%ebx
520 shlb $4,%al
521 movzbq (%rsp,%rcx,1),%r13
522 shrl $4,%ebx
523 shlq $48,%r12
524 xorq %r8,%r13
525 movq %r9,%r10
526 xorq %r12,%r9
527 shrq $8,%r8
528 movzbq %r13b,%r13
529 shrq $8,%r9
530 xorq -128(%rbp,%rcx,8),%r8
531 shlq $56,%r10
532 xorq (%rbp,%rcx,8),%r9
533 roll $8,%edx
534 xorq 8(%rsi,%rax,1),%r8
535 xorq (%rsi,%rax,1),%r9
536 movb %dl,%al
537 xorq %r10,%r8
538 movzwq (%r11,%r13,2),%r13
539 movzbl %dl,%ecx
540 shlb $4,%al
541 movzbq (%rsp,%rbx,1),%r12
542 shrl $4,%ecx
543 shlq $48,%r13
544 xorq %r8,%r12
545 movq %r9,%r10
546 xorq %r13,%r9
547 shrq $8,%r8
548 movzbq %r12b,%r12
549 shrq $8,%r9
550 xorq -128(%rbp,%rbx,8),%r8
551 shlq $56,%r10
552 xorq (%rbp,%rbx,8),%r9
553 roll $8,%edx
554 xorq 8(%rsi,%rax,1),%r8
555 xorq (%rsi,%rax,1),%r9
556 movb %dl,%al
557 xorq %r10,%r8
558 movzwq (%r11,%r12,2),%r12
559 movzbl %dl,%ebx
560 shlb $4,%al
561 movzbq (%rsp,%rcx,1),%r13
562 shrl $4,%ebx
563 shlq $48,%r12
564 xorq %r8,%r13
565 movq %r9,%r10
566 xorq %r12,%r9
567 shrq $8,%r8
568 movzbq %r13b,%r13
569 shrq $8,%r9
570 xorq -128(%rbp,%rcx,8),%r8
571 shlq $56,%r10
572 xorq (%rbp,%rcx,8),%r9
573 roll $8,%edx
574 xorq 8(%rsi,%rax,1),%r8
575 xorq (%rsi,%rax,1),%r9
576 movb %dl,%al
577 xorq %r10,%r8
578 movzwq (%r11,%r13,2),%r13
579 movzbl %dl,%ecx
580 shlb $4,%al
581 movzbq (%rsp,%rbx,1),%r12
582 shrl $4,%ecx
583 shlq $48,%r13
584 xorq %r8,%r12
585 movq %r9,%r10
586 xorq %r13,%r9
587 shrq $8,%r8
588 movzbq %r12b,%r12
589 movl 0(%rdi),%edx
590 shrq $8,%r9
591 xorq -128(%rbp,%rbx,8),%r8
592 shlq $56,%r10
593 xorq (%rbp,%rbx,8),%r9
594 roll $8,%edx
595 xorq 8(%rsi,%rax,1),%r8
596 xorq (%rsi,%rax,1),%r9
597 movb %dl,%al
598 xorq %r10,%r8
599 movzwq (%r11,%r12,2),%r12
600 movzbl %dl,%ebx
601 shlb $4,%al
602 movzbq (%rsp,%rcx,1),%r13
603 shrl $4,%ebx
604 shlq $48,%r12
605 xorq %r8,%r13
606 movq %r9,%r10
607 xorq %r12,%r9
608 shrq $8,%r8
609 movzbq %r13b,%r13
610 shrq $8,%r9
611 xorq -128(%rbp,%rcx,8),%r8
612 shlq $56,%r10
613 xorq (%rbp,%rcx,8),%r9
614 roll $8,%edx
615 xorq 8(%rsi,%rax,1),%r8
616 xorq (%rsi,%rax,1),%r9
617 movb %dl,%al
618 xorq %r10,%r8
619 movzwq (%r11,%r13,2),%r13
620 movzbl %dl,%ecx
621 shlb $4,%al
622 movzbq (%rsp,%rbx,1),%r12
623 shrl $4,%ecx
624 shlq $48,%r13
625 xorq %r8,%r12
626 movq %r9,%r10
627 xorq %r13,%r9
628 shrq $8,%r8
629 movzbq %r12b,%r12
630 shrq $8,%r9
631 xorq -128(%rbp,%rbx,8),%r8
632 shlq $56,%r10
633 xorq (%rbp,%rbx,8),%r9
634 roll $8,%edx
635 xorq 8(%rsi,%rax,1),%r8
636 xorq (%rsi,%rax,1),%r9
637 movb %dl,%al
638 xorq %r10,%r8
639 movzwq (%r11,%r12,2),%r12
640 movzbl %dl,%ebx
641 shlb $4,%al
642 movzbq (%rsp,%rcx,1),%r13
643 shrl $4,%ebx
644 shlq $48,%r12
645 xorq %r8,%r13
646 movq %r9,%r10
647 xorq %r12,%r9
648 shrq $8,%r8
649 movzbq %r13b,%r13
650 shrq $8,%r9
651 xorq -128(%rbp,%rcx,8),%r8
652 shlq $56,%r10
653 xorq (%rbp,%rcx,8),%r9
654 roll $8,%edx
655 xorq 8(%rsi,%rax,1),%r8
656 xorq (%rsi,%rax,1),%r9
657 movb %dl,%al
658 xorq %r10,%r8
659 movzwq (%r11,%r13,2),%r13
660 movzbl %dl,%ecx
661 shlb $4,%al
662 movzbq (%rsp,%rbx,1),%r12
663 andl $240,%ecx
664 shlq $48,%r13
665 xorq %r8,%r12
666 movq %r9,%r10
667 xorq %r13,%r9
668 shrq $8,%r8
669 movzbq %r12b,%r12
670 movl -4(%rdi),%edx
671 shrq $8,%r9
672 xorq -128(%rbp,%rbx,8),%r8
673 shlq $56,%r10
674 xorq (%rbp,%rbx,8),%r9
675 movzwq (%r11,%r12,2),%r12
676 xorq 8(%rsi,%rax,1),%r8
677 xorq (%rsi,%rax,1),%r9
678 shlq $48,%r12
679 xorq %r10,%r8
680 xorq %r12,%r9
681 movzbq %r8b,%r13
682 shrq $4,%r8
683 movq %r9,%r10
684 shlb $4,%r13b
685 shrq $4,%r9
686 xorq 8(%rsi,%rcx,1),%r8
687 movzwq (%r11,%r13,2),%r13
688 shlq $60,%r10
689 xorq (%rsi,%rcx,1),%r9
690 xorq %r10,%r8
691 shlq $48,%r13
692 bswapq %r8
693 xorq %r13,%r9
694 bswapq %r9
695 cmpq %r15,%r14
696 jb .Louter_loop
697 movq %r8,8(%rdi)
698 movq %r9,(%rdi)
699
700 leaq 280+48(%rsp),%rsi
701.cfi_def_cfa %rsi,8
702 movq -48(%rsi),%r15
703.cfi_restore %r15
704 movq -40(%rsi),%r14
705.cfi_restore %r14
706 movq -32(%rsi),%r13
707.cfi_restore %r13
708 movq -24(%rsi),%r12
709.cfi_restore %r12
710 movq -16(%rsi),%rbp
711.cfi_restore %rbp
712 movq -8(%rsi),%rbx
713.cfi_restore %rbx
714 leaq 0(%rsi),%rsp
715.cfi_def_cfa_register %rsp
716.Lghash_epilogue:
717 .byte 0xf3,0xc3
718.cfi_endproc
719.size gcm_ghash_4bit,.-gcm_ghash_4bit
720.globl gcm_init_clmul
721.hidden gcm_init_clmul
722.type gcm_init_clmul,@function
723.align 16
724gcm_init_clmul:
725.cfi_startproc
726.L_init_clmul:
727 movdqu (%rsi),%xmm2
728 pshufd $78,%xmm2,%xmm2
729
730
731 pshufd $255,%xmm2,%xmm4
732 movdqa %xmm2,%xmm3
733 psllq $1,%xmm2
734 pxor %xmm5,%xmm5
735 psrlq $63,%xmm3
736 pcmpgtd %xmm4,%xmm5
737 pslldq $8,%xmm3
738 por %xmm3,%xmm2
739
740
741 pand .L0x1c2_polynomial(%rip),%xmm5
742 pxor %xmm5,%xmm2
743
744
745 pshufd $78,%xmm2,%xmm6
746 movdqa %xmm2,%xmm0
747 pxor %xmm2,%xmm6
748 movdqa %xmm0,%xmm1
749 pshufd $78,%xmm0,%xmm3
750 pxor %xmm0,%xmm3
751.byte 102,15,58,68,194,0
752.byte 102,15,58,68,202,17
753.byte 102,15,58,68,222,0
754 pxor %xmm0,%xmm3
755 pxor %xmm1,%xmm3
756
757 movdqa %xmm3,%xmm4
758 psrldq $8,%xmm3
759 pslldq $8,%xmm4
760 pxor %xmm3,%xmm1
761 pxor %xmm4,%xmm0
762
763 movdqa %xmm0,%xmm4
764 movdqa %xmm0,%xmm3
765 psllq $5,%xmm0
766 pxor %xmm0,%xmm3
767 psllq $1,%xmm0
768 pxor %xmm3,%xmm0
769 psllq $57,%xmm0
770 movdqa %xmm0,%xmm3
771 pslldq $8,%xmm0
772 psrldq $8,%xmm3
773 pxor %xmm4,%xmm0
774 pxor %xmm3,%xmm1
775
776
777 movdqa %xmm0,%xmm4
778 psrlq $1,%xmm0
779 pxor %xmm4,%xmm1
780 pxor %xmm0,%xmm4
781 psrlq $5,%xmm0
782 pxor %xmm4,%xmm0
783 psrlq $1,%xmm0
784 pxor %xmm1,%xmm0
785 pshufd $78,%xmm2,%xmm3
786 pshufd $78,%xmm0,%xmm4
787 pxor %xmm2,%xmm3
788 movdqu %xmm2,0(%rdi)
789 pxor %xmm0,%xmm4
790 movdqu %xmm0,16(%rdi)
791.byte 102,15,58,15,227,8
792 movdqu %xmm4,32(%rdi)
793 movdqa %xmm0,%xmm1
794 pshufd $78,%xmm0,%xmm3
795 pxor %xmm0,%xmm3
796.byte 102,15,58,68,194,0
797.byte 102,15,58,68,202,17
798.byte 102,15,58,68,222,0
799 pxor %xmm0,%xmm3
800 pxor %xmm1,%xmm3
801
802 movdqa %xmm3,%xmm4
803 psrldq $8,%xmm3
804 pslldq $8,%xmm4
805 pxor %xmm3,%xmm1
806 pxor %xmm4,%xmm0
807
808 movdqa %xmm0,%xmm4
809 movdqa %xmm0,%xmm3
810 psllq $5,%xmm0
811 pxor %xmm0,%xmm3
812 psllq $1,%xmm0
813 pxor %xmm3,%xmm0
814 psllq $57,%xmm0
815 movdqa %xmm0,%xmm3
816 pslldq $8,%xmm0
817 psrldq $8,%xmm3
818 pxor %xmm4,%xmm0
819 pxor %xmm3,%xmm1
820
821
822 movdqa %xmm0,%xmm4
823 psrlq $1,%xmm0
824 pxor %xmm4,%xmm1
825 pxor %xmm0,%xmm4
826 psrlq $5,%xmm0
827 pxor %xmm4,%xmm0
828 psrlq $1,%xmm0
829 pxor %xmm1,%xmm0
830 movdqa %xmm0,%xmm5
831 movdqa %xmm0,%xmm1
832 pshufd $78,%xmm0,%xmm3
833 pxor %xmm0,%xmm3
834.byte 102,15,58,68,194,0
835.byte 102,15,58,68,202,17
836.byte 102,15,58,68,222,0
837 pxor %xmm0,%xmm3
838 pxor %xmm1,%xmm3
839
840 movdqa %xmm3,%xmm4
841 psrldq $8,%xmm3
842 pslldq $8,%xmm4
843 pxor %xmm3,%xmm1
844 pxor %xmm4,%xmm0
845
846 movdqa %xmm0,%xmm4
847 movdqa %xmm0,%xmm3
848 psllq $5,%xmm0
849 pxor %xmm0,%xmm3
850 psllq $1,%xmm0
851 pxor %xmm3,%xmm0
852 psllq $57,%xmm0
853 movdqa %xmm0,%xmm3
854 pslldq $8,%xmm0
855 psrldq $8,%xmm3
856 pxor %xmm4,%xmm0
857 pxor %xmm3,%xmm1
858
859
860 movdqa %xmm0,%xmm4
861 psrlq $1,%xmm0
862 pxor %xmm4,%xmm1
863 pxor %xmm0,%xmm4
864 psrlq $5,%xmm0
865 pxor %xmm4,%xmm0
866 psrlq $1,%xmm0
867 pxor %xmm1,%xmm0
868 pshufd $78,%xmm5,%xmm3
869 pshufd $78,%xmm0,%xmm4
870 pxor %xmm5,%xmm3
871 movdqu %xmm5,48(%rdi)
872 pxor %xmm0,%xmm4
873 movdqu %xmm0,64(%rdi)
874.byte 102,15,58,15,227,8
875 movdqu %xmm4,80(%rdi)
876 .byte 0xf3,0xc3
877.cfi_endproc
878.size gcm_init_clmul,.-gcm_init_clmul
879.globl gcm_gmult_clmul
880.hidden gcm_gmult_clmul
881.type gcm_gmult_clmul,@function
882.align 16
883gcm_gmult_clmul:
884.cfi_startproc
885.L_gmult_clmul:
886 movdqu (%rdi),%xmm0
887 movdqa .Lbswap_mask(%rip),%xmm5
888 movdqu (%rsi),%xmm2
889 movdqu 32(%rsi),%xmm4
890.byte 102,15,56,0,197
891 movdqa %xmm0,%xmm1
892 pshufd $78,%xmm0,%xmm3
893 pxor %xmm0,%xmm3
894.byte 102,15,58,68,194,0
895.byte 102,15,58,68,202,17
896.byte 102,15,58,68,220,0
897 pxor %xmm0,%xmm3
898 pxor %xmm1,%xmm3
899
900 movdqa %xmm3,%xmm4
901 psrldq $8,%xmm3
902 pslldq $8,%xmm4
903 pxor %xmm3,%xmm1
904 pxor %xmm4,%xmm0
905
906 movdqa %xmm0,%xmm4
907 movdqa %xmm0,%xmm3
908 psllq $5,%xmm0
909 pxor %xmm0,%xmm3
910 psllq $1,%xmm0
911 pxor %xmm3,%xmm0
912 psllq $57,%xmm0
913 movdqa %xmm0,%xmm3
914 pslldq $8,%xmm0
915 psrldq $8,%xmm3
916 pxor %xmm4,%xmm0
917 pxor %xmm3,%xmm1
918
919
920 movdqa %xmm0,%xmm4
921 psrlq $1,%xmm0
922 pxor %xmm4,%xmm1
923 pxor %xmm0,%xmm4
924 psrlq $5,%xmm0
925 pxor %xmm4,%xmm0
926 psrlq $1,%xmm0
927 pxor %xmm1,%xmm0
928.byte 102,15,56,0,197
929 movdqu %xmm0,(%rdi)
930 .byte 0xf3,0xc3
931.cfi_endproc
932.size gcm_gmult_clmul,.-gcm_gmult_clmul
933.globl gcm_ghash_clmul
934.hidden gcm_ghash_clmul
935.type gcm_ghash_clmul,@function
936.align 32
937gcm_ghash_clmul:
938.cfi_startproc
939.L_ghash_clmul:
940 movdqa .Lbswap_mask(%rip),%xmm10
941
942 movdqu (%rdi),%xmm0
943 movdqu (%rsi),%xmm2
944 movdqu 32(%rsi),%xmm7
945.byte 102,65,15,56,0,194
946
947 subq $0x10,%rcx
948 jz .Lodd_tail
949
950 movdqu 16(%rsi),%xmm6
951 leaq OPENSSL_ia32cap_P(%rip),%rax
952 movl 4(%rax),%eax
953 cmpq $0x30,%rcx
954 jb .Lskip4x
955
956 andl $71303168,%eax
957 cmpl $4194304,%eax
958 je .Lskip4x
959
960 subq $0x30,%rcx
961 movq $0xA040608020C0E000,%rax
962 movdqu 48(%rsi),%xmm14
963 movdqu 64(%rsi),%xmm15
964
965
966
967
968 movdqu 48(%rdx),%xmm3
969 movdqu 32(%rdx),%xmm11
970.byte 102,65,15,56,0,218
971.byte 102,69,15,56,0,218
972 movdqa %xmm3,%xmm5
973 pshufd $78,%xmm3,%xmm4
974 pxor %xmm3,%xmm4
975.byte 102,15,58,68,218,0
976.byte 102,15,58,68,234,17
977.byte 102,15,58,68,231,0
978
979 movdqa %xmm11,%xmm13
980 pshufd $78,%xmm11,%xmm12
981 pxor %xmm11,%xmm12
982.byte 102,68,15,58,68,222,0
983.byte 102,68,15,58,68,238,17
984.byte 102,68,15,58,68,231,16
985 xorps %xmm11,%xmm3
986 xorps %xmm13,%xmm5
987 movups 80(%rsi),%xmm7
988 xorps %xmm12,%xmm4
989
990 movdqu 16(%rdx),%xmm11
991 movdqu 0(%rdx),%xmm8
992.byte 102,69,15,56,0,218
993.byte 102,69,15,56,0,194
994 movdqa %xmm11,%xmm13
995 pshufd $78,%xmm11,%xmm12
996 pxor %xmm8,%xmm0
997 pxor %xmm11,%xmm12
998.byte 102,69,15,58,68,222,0
999 movdqa %xmm0,%xmm1
1000 pshufd $78,%xmm0,%xmm8
1001 pxor %xmm0,%xmm8
1002.byte 102,69,15,58,68,238,17
1003.byte 102,68,15,58,68,231,0
1004 xorps %xmm11,%xmm3
1005 xorps %xmm13,%xmm5
1006
1007 leaq 64(%rdx),%rdx
1008 subq $0x40,%rcx
1009 jc .Ltail4x
1010
1011 jmp .Lmod4_loop
1012.align 32
1013.Lmod4_loop:
1014.byte 102,65,15,58,68,199,0
1015 xorps %xmm12,%xmm4
1016 movdqu 48(%rdx),%xmm11
1017.byte 102,69,15,56,0,218
1018.byte 102,65,15,58,68,207,17
1019 xorps %xmm3,%xmm0
1020 movdqu 32(%rdx),%xmm3
1021 movdqa %xmm11,%xmm13
1022.byte 102,68,15,58,68,199,16
1023 pshufd $78,%xmm11,%xmm12
1024 xorps %xmm5,%xmm1
1025 pxor %xmm11,%xmm12
1026.byte 102,65,15,56,0,218
1027 movups 32(%rsi),%xmm7
1028 xorps %xmm4,%xmm8
1029.byte 102,68,15,58,68,218,0
1030 pshufd $78,%xmm3,%xmm4
1031
1032 pxor %xmm0,%xmm8
1033 movdqa %xmm3,%xmm5
1034 pxor %xmm1,%xmm8
1035 pxor %xmm3,%xmm4
1036 movdqa %xmm8,%xmm9
1037.byte 102,68,15,58,68,234,17
1038 pslldq $8,%xmm8
1039 psrldq $8,%xmm9
1040 pxor %xmm8,%xmm0
1041 movdqa .L7_mask(%rip),%xmm8
1042 pxor %xmm9,%xmm1
1043.byte 102,76,15,110,200
1044
1045 pand %xmm0,%xmm8
1046.byte 102,69,15,56,0,200
1047 pxor %xmm0,%xmm9
1048.byte 102,68,15,58,68,231,0
1049 psllq $57,%xmm9
1050 movdqa %xmm9,%xmm8
1051 pslldq $8,%xmm9
1052.byte 102,15,58,68,222,0
1053 psrldq $8,%xmm8
1054 pxor %xmm9,%xmm0
1055 pxor %xmm8,%xmm1
1056 movdqu 0(%rdx),%xmm8
1057
1058 movdqa %xmm0,%xmm9
1059 psrlq $1,%xmm0
1060.byte 102,15,58,68,238,17
1061 xorps %xmm11,%xmm3
1062 movdqu 16(%rdx),%xmm11
1063.byte 102,69,15,56,0,218
1064.byte 102,15,58,68,231,16
1065 xorps %xmm13,%xmm5
1066 movups 80(%rsi),%xmm7
1067.byte 102,69,15,56,0,194
1068 pxor %xmm9,%xmm1
1069 pxor %xmm0,%xmm9
1070 psrlq $5,%xmm0
1071
1072 movdqa %xmm11,%xmm13
1073 pxor %xmm12,%xmm4
1074 pshufd $78,%xmm11,%xmm12
1075 pxor %xmm9,%xmm0
1076 pxor %xmm8,%xmm1
1077 pxor %xmm11,%xmm12
1078.byte 102,69,15,58,68,222,0
1079 psrlq $1,%xmm0
1080 pxor %xmm1,%xmm0
1081 movdqa %xmm0,%xmm1
1082.byte 102,69,15,58,68,238,17
1083 xorps %xmm11,%xmm3
1084 pshufd $78,%xmm0,%xmm8
1085 pxor %xmm0,%xmm8
1086
1087.byte 102,68,15,58,68,231,0
1088 xorps %xmm13,%xmm5
1089
1090 leaq 64(%rdx),%rdx
1091 subq $0x40,%rcx
1092 jnc .Lmod4_loop
1093
1094.Ltail4x:
1095.byte 102,65,15,58,68,199,0
1096.byte 102,65,15,58,68,207,17
1097.byte 102,68,15,58,68,199,16
1098 xorps %xmm12,%xmm4
1099 xorps %xmm3,%xmm0
1100 xorps %xmm5,%xmm1
1101 pxor %xmm0,%xmm1
1102 pxor %xmm4,%xmm8
1103
1104 pxor %xmm1,%xmm8
1105 pxor %xmm0,%xmm1
1106
1107 movdqa %xmm8,%xmm9
1108 psrldq $8,%xmm8
1109 pslldq $8,%xmm9
1110 pxor %xmm8,%xmm1
1111 pxor %xmm9,%xmm0
1112
1113 movdqa %xmm0,%xmm4
1114 movdqa %xmm0,%xmm3
1115 psllq $5,%xmm0
1116 pxor %xmm0,%xmm3
1117 psllq $1,%xmm0
1118 pxor %xmm3,%xmm0
1119 psllq $57,%xmm0
1120 movdqa %xmm0,%xmm3
1121 pslldq $8,%xmm0
1122 psrldq $8,%xmm3
1123 pxor %xmm4,%xmm0
1124 pxor %xmm3,%xmm1
1125
1126
1127 movdqa %xmm0,%xmm4
1128 psrlq $1,%xmm0
1129 pxor %xmm4,%xmm1
1130 pxor %xmm0,%xmm4
1131 psrlq $5,%xmm0
1132 pxor %xmm4,%xmm0
1133 psrlq $1,%xmm0
1134 pxor %xmm1,%xmm0
1135 addq $0x40,%rcx
1136 jz .Ldone
1137 movdqu 32(%rsi),%xmm7
1138 subq $0x10,%rcx
1139 jz .Lodd_tail
1140.Lskip4x:
1141
1142
1143
1144
1145
1146 movdqu (%rdx),%xmm8
1147 movdqu 16(%rdx),%xmm3
1148.byte 102,69,15,56,0,194
1149.byte 102,65,15,56,0,218
1150 pxor %xmm8,%xmm0
1151
1152 movdqa %xmm3,%xmm5
1153 pshufd $78,%xmm3,%xmm4
1154 pxor %xmm3,%xmm4
1155.byte 102,15,58,68,218,0
1156.byte 102,15,58,68,234,17
1157.byte 102,15,58,68,231,0
1158
1159 leaq 32(%rdx),%rdx
1160 nop
1161 subq $0x20,%rcx
1162 jbe .Leven_tail
1163 nop
1164 jmp .Lmod_loop
1165
1166.align 32
1167.Lmod_loop:
1168 movdqa %xmm0,%xmm1
1169 movdqa %xmm4,%xmm8
1170 pshufd $78,%xmm0,%xmm4
1171 pxor %xmm0,%xmm4
1172
1173.byte 102,15,58,68,198,0
1174.byte 102,15,58,68,206,17
1175.byte 102,15,58,68,231,16
1176
1177 pxor %xmm3,%xmm0
1178 pxor %xmm5,%xmm1
1179 movdqu (%rdx),%xmm9
1180 pxor %xmm0,%xmm8
1181.byte 102,69,15,56,0,202
1182 movdqu 16(%rdx),%xmm3
1183
1184 pxor %xmm1,%xmm8
1185 pxor %xmm9,%xmm1
1186 pxor %xmm8,%xmm4
1187.byte 102,65,15,56,0,218
1188 movdqa %xmm4,%xmm8
1189 psrldq $8,%xmm8
1190 pslldq $8,%xmm4
1191 pxor %xmm8,%xmm1
1192 pxor %xmm4,%xmm0
1193
1194 movdqa %xmm3,%xmm5
1195
1196 movdqa %xmm0,%xmm9
1197 movdqa %xmm0,%xmm8
1198 psllq $5,%xmm0
1199 pxor %xmm0,%xmm8
1200.byte 102,15,58,68,218,0
1201 psllq $1,%xmm0
1202 pxor %xmm8,%xmm0
1203 psllq $57,%xmm0
1204 movdqa %xmm0,%xmm8
1205 pslldq $8,%xmm0
1206 psrldq $8,%xmm8
1207 pxor %xmm9,%xmm0
1208 pshufd $78,%xmm5,%xmm4
1209 pxor %xmm8,%xmm1
1210 pxor %xmm5,%xmm4
1211
1212 movdqa %xmm0,%xmm9
1213 psrlq $1,%xmm0
1214.byte 102,15,58,68,234,17
1215 pxor %xmm9,%xmm1
1216 pxor %xmm0,%xmm9
1217 psrlq $5,%xmm0
1218 pxor %xmm9,%xmm0
1219 leaq 32(%rdx),%rdx
1220 psrlq $1,%xmm0
1221.byte 102,15,58,68,231,0
1222 pxor %xmm1,%xmm0
1223
1224 subq $0x20,%rcx
1225 ja .Lmod_loop
1226
1227.Leven_tail:
1228 movdqa %xmm0,%xmm1
1229 movdqa %xmm4,%xmm8
1230 pshufd $78,%xmm0,%xmm4
1231 pxor %xmm0,%xmm4
1232
1233.byte 102,15,58,68,198,0
1234.byte 102,15,58,68,206,17
1235.byte 102,15,58,68,231,16
1236
1237 pxor %xmm3,%xmm0
1238 pxor %xmm5,%xmm1
1239 pxor %xmm0,%xmm8
1240 pxor %xmm1,%xmm8
1241 pxor %xmm8,%xmm4
1242 movdqa %xmm4,%xmm8
1243 psrldq $8,%xmm8
1244 pslldq $8,%xmm4
1245 pxor %xmm8,%xmm1
1246 pxor %xmm4,%xmm0
1247
1248 movdqa %xmm0,%xmm4
1249 movdqa %xmm0,%xmm3
1250 psllq $5,%xmm0
1251 pxor %xmm0,%xmm3
1252 psllq $1,%xmm0
1253 pxor %xmm3,%xmm0
1254 psllq $57,%xmm0
1255 movdqa %xmm0,%xmm3
1256 pslldq $8,%xmm0
1257 psrldq $8,%xmm3
1258 pxor %xmm4,%xmm0
1259 pxor %xmm3,%xmm1
1260
1261
1262 movdqa %xmm0,%xmm4
1263 psrlq $1,%xmm0
1264 pxor %xmm4,%xmm1
1265 pxor %xmm0,%xmm4
1266 psrlq $5,%xmm0
1267 pxor %xmm4,%xmm0
1268 psrlq $1,%xmm0
1269 pxor %xmm1,%xmm0
1270 testq %rcx,%rcx
1271 jnz .Ldone
1272
1273.Lodd_tail:
1274 movdqu (%rdx),%xmm8
1275.byte 102,69,15,56,0,194
1276 pxor %xmm8,%xmm0
1277 movdqa %xmm0,%xmm1
1278 pshufd $78,%xmm0,%xmm3
1279 pxor %xmm0,%xmm3
1280.byte 102,15,58,68,194,0
1281.byte 102,15,58,68,202,17
1282.byte 102,15,58,68,223,0
1283 pxor %xmm0,%xmm3
1284 pxor %xmm1,%xmm3
1285
1286 movdqa %xmm3,%xmm4
1287 psrldq $8,%xmm3
1288 pslldq $8,%xmm4
1289 pxor %xmm3,%xmm1
1290 pxor %xmm4,%xmm0
1291
1292 movdqa %xmm0,%xmm4
1293 movdqa %xmm0,%xmm3
1294 psllq $5,%xmm0
1295 pxor %xmm0,%xmm3
1296 psllq $1,%xmm0
1297 pxor %xmm3,%xmm0
1298 psllq $57,%xmm0
1299 movdqa %xmm0,%xmm3
1300 pslldq $8,%xmm0
1301 psrldq $8,%xmm3
1302 pxor %xmm4,%xmm0
1303 pxor %xmm3,%xmm1
1304
1305
1306 movdqa %xmm0,%xmm4
1307 psrlq $1,%xmm0
1308 pxor %xmm4,%xmm1
1309 pxor %xmm0,%xmm4
1310 psrlq $5,%xmm0
1311 pxor %xmm4,%xmm0
1312 psrlq $1,%xmm0
1313 pxor %xmm1,%xmm0
1314.Ldone:
1315.byte 102,65,15,56,0,194
1316 movdqu %xmm0,(%rdi)
1317 .byte 0xf3,0xc3
1318.cfi_endproc
1319.size gcm_ghash_clmul,.-gcm_ghash_clmul
1320.globl gcm_init_avx
1321.hidden gcm_init_avx
1322.type gcm_init_avx,@function
1323.align 32
1324gcm_init_avx:
1325.cfi_startproc
1326 vzeroupper
1327
1328 vmovdqu (%rsi),%xmm2
1329 vpshufd $78,%xmm2,%xmm2
1330
1331
1332 vpshufd $255,%xmm2,%xmm4
1333 vpsrlq $63,%xmm2,%xmm3
1334 vpsllq $1,%xmm2,%xmm2
1335 vpxor %xmm5,%xmm5,%xmm5
1336 vpcmpgtd %xmm4,%xmm5,%xmm5
1337 vpslldq $8,%xmm3,%xmm3
1338 vpor %xmm3,%xmm2,%xmm2
1339
1340
1341 vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5
1342 vpxor %xmm5,%xmm2,%xmm2
1343
1344 vpunpckhqdq %xmm2,%xmm2,%xmm6
1345 vmovdqa %xmm2,%xmm0
1346 vpxor %xmm2,%xmm6,%xmm6
1347 movq $4,%r10
1348 jmp .Linit_start_avx
1349.align 32
1350.Linit_loop_avx:
1351 vpalignr $8,%xmm3,%xmm4,%xmm5
1352 vmovdqu %xmm5,-16(%rdi)
1353 vpunpckhqdq %xmm0,%xmm0,%xmm3
1354 vpxor %xmm0,%xmm3,%xmm3
1355 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
1356 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
1357 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
1358 vpxor %xmm0,%xmm1,%xmm4
1359 vpxor %xmm4,%xmm3,%xmm3
1360
1361 vpslldq $8,%xmm3,%xmm4
1362 vpsrldq $8,%xmm3,%xmm3
1363 vpxor %xmm4,%xmm0,%xmm0
1364 vpxor %xmm3,%xmm1,%xmm1
1365 vpsllq $57,%xmm0,%xmm3
1366 vpsllq $62,%xmm0,%xmm4
1367 vpxor %xmm3,%xmm4,%xmm4
1368 vpsllq $63,%xmm0,%xmm3
1369 vpxor %xmm3,%xmm4,%xmm4
1370 vpslldq $8,%xmm4,%xmm3
1371 vpsrldq $8,%xmm4,%xmm4
1372 vpxor %xmm3,%xmm0,%xmm0
1373 vpxor %xmm4,%xmm1,%xmm1
1374
1375 vpsrlq $1,%xmm0,%xmm4
1376 vpxor %xmm0,%xmm1,%xmm1
1377 vpxor %xmm4,%xmm0,%xmm0
1378 vpsrlq $5,%xmm4,%xmm4
1379 vpxor %xmm4,%xmm0,%xmm0
1380 vpsrlq $1,%xmm0,%xmm0
1381 vpxor %xmm1,%xmm0,%xmm0
1382.Linit_start_avx:
1383 vmovdqa %xmm0,%xmm5
1384 vpunpckhqdq %xmm0,%xmm0,%xmm3
1385 vpxor %xmm0,%xmm3,%xmm3
1386 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
1387 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
1388 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
1389 vpxor %xmm0,%xmm1,%xmm4
1390 vpxor %xmm4,%xmm3,%xmm3
1391
1392 vpslldq $8,%xmm3,%xmm4
1393 vpsrldq $8,%xmm3,%xmm3
1394 vpxor %xmm4,%xmm0,%xmm0
1395 vpxor %xmm3,%xmm1,%xmm1
1396 vpsllq $57,%xmm0,%xmm3
1397 vpsllq $62,%xmm0,%xmm4
1398 vpxor %xmm3,%xmm4,%xmm4
1399 vpsllq $63,%xmm0,%xmm3
1400 vpxor %xmm3,%xmm4,%xmm4
1401 vpslldq $8,%xmm4,%xmm3
1402 vpsrldq $8,%xmm4,%xmm4
1403 vpxor %xmm3,%xmm0,%xmm0
1404 vpxor %xmm4,%xmm1,%xmm1
1405
1406 vpsrlq $1,%xmm0,%xmm4
1407 vpxor %xmm0,%xmm1,%xmm1
1408 vpxor %xmm4,%xmm0,%xmm0
1409 vpsrlq $5,%xmm4,%xmm4
1410 vpxor %xmm4,%xmm0,%xmm0
1411 vpsrlq $1,%xmm0,%xmm0
1412 vpxor %xmm1,%xmm0,%xmm0
1413 vpshufd $78,%xmm5,%xmm3
1414 vpshufd $78,%xmm0,%xmm4
1415 vpxor %xmm5,%xmm3,%xmm3
1416 vmovdqu %xmm5,0(%rdi)
1417 vpxor %xmm0,%xmm4,%xmm4
1418 vmovdqu %xmm0,16(%rdi)
1419 leaq 48(%rdi),%rdi
1420 subq $1,%r10
1421 jnz .Linit_loop_avx
1422
1423 vpalignr $8,%xmm4,%xmm3,%xmm5
1424 vmovdqu %xmm5,-16(%rdi)
1425
1426 vzeroupper
1427 .byte 0xf3,0xc3
1428.cfi_endproc
1429.size gcm_init_avx,.-gcm_init_avx
1430.globl gcm_gmult_avx
1431.hidden gcm_gmult_avx
1432.type gcm_gmult_avx,@function
1433.align 32
1434gcm_gmult_avx:
1435.cfi_startproc
1436 jmp .L_gmult_clmul
1437.cfi_endproc
1438.size gcm_gmult_avx,.-gcm_gmult_avx
1439.globl gcm_ghash_avx
1440.hidden gcm_ghash_avx
1441.type gcm_ghash_avx,@function
1442.align 32
1443gcm_ghash_avx:
1444.cfi_startproc
1445 vzeroupper
1446
1447 vmovdqu (%rdi),%xmm10
1448 leaq .L0x1c2_polynomial(%rip),%r10
1449 leaq 64(%rsi),%rsi
1450 vmovdqu .Lbswap_mask(%rip),%xmm13
1451 vpshufb %xmm13,%xmm10,%xmm10
1452 cmpq $0x80,%rcx
1453 jb .Lshort_avx
1454 subq $0x80,%rcx
1455
1456 vmovdqu 112(%rdx),%xmm14
1457 vmovdqu 0-64(%rsi),%xmm6
1458 vpshufb %xmm13,%xmm14,%xmm14
1459 vmovdqu 32-64(%rsi),%xmm7
1460
1461 vpunpckhqdq %xmm14,%xmm14,%xmm9
1462 vmovdqu 96(%rdx),%xmm15
1463 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1464 vpxor %xmm14,%xmm9,%xmm9
1465 vpshufb %xmm13,%xmm15,%xmm15
1466 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1467 vmovdqu 16-64(%rsi),%xmm6
1468 vpunpckhqdq %xmm15,%xmm15,%xmm8
1469 vmovdqu 80(%rdx),%xmm14
1470 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1471 vpxor %xmm15,%xmm8,%xmm8
1472
1473 vpshufb %xmm13,%xmm14,%xmm14
1474 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1475 vpunpckhqdq %xmm14,%xmm14,%xmm9
1476 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1477 vmovdqu 48-64(%rsi),%xmm6
1478 vpxor %xmm14,%xmm9,%xmm9
1479 vmovdqu 64(%rdx),%xmm15
1480 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1481 vmovdqu 80-64(%rsi),%xmm7
1482
1483 vpshufb %xmm13,%xmm15,%xmm15
1484 vpxor %xmm0,%xmm3,%xmm3
1485 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1486 vpxor %xmm1,%xmm4,%xmm4
1487 vpunpckhqdq %xmm15,%xmm15,%xmm8
1488 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1489 vmovdqu 64-64(%rsi),%xmm6
1490 vpxor %xmm2,%xmm5,%xmm5
1491 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1492 vpxor %xmm15,%xmm8,%xmm8
1493
1494 vmovdqu 48(%rdx),%xmm14
1495 vpxor %xmm3,%xmm0,%xmm0
1496 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1497 vpxor %xmm4,%xmm1,%xmm1
1498 vpshufb %xmm13,%xmm14,%xmm14
1499 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1500 vmovdqu 96-64(%rsi),%xmm6
1501 vpxor %xmm5,%xmm2,%xmm2
1502 vpunpckhqdq %xmm14,%xmm14,%xmm9
1503 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1504 vmovdqu 128-64(%rsi),%xmm7
1505 vpxor %xmm14,%xmm9,%xmm9
1506
1507 vmovdqu 32(%rdx),%xmm15
1508 vpxor %xmm0,%xmm3,%xmm3
1509 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1510 vpxor %xmm1,%xmm4,%xmm4
1511 vpshufb %xmm13,%xmm15,%xmm15
1512 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1513 vmovdqu 112-64(%rsi),%xmm6
1514 vpxor %xmm2,%xmm5,%xmm5
1515 vpunpckhqdq %xmm15,%xmm15,%xmm8
1516 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1517 vpxor %xmm15,%xmm8,%xmm8
1518
1519 vmovdqu 16(%rdx),%xmm14
1520 vpxor %xmm3,%xmm0,%xmm0
1521 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1522 vpxor %xmm4,%xmm1,%xmm1
1523 vpshufb %xmm13,%xmm14,%xmm14
1524 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1525 vmovdqu 144-64(%rsi),%xmm6
1526 vpxor %xmm5,%xmm2,%xmm2
1527 vpunpckhqdq %xmm14,%xmm14,%xmm9
1528 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1529 vmovdqu 176-64(%rsi),%xmm7
1530 vpxor %xmm14,%xmm9,%xmm9
1531
1532 vmovdqu (%rdx),%xmm15
1533 vpxor %xmm0,%xmm3,%xmm3
1534 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1535 vpxor %xmm1,%xmm4,%xmm4
1536 vpshufb %xmm13,%xmm15,%xmm15
1537 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1538 vmovdqu 160-64(%rsi),%xmm6
1539 vpxor %xmm2,%xmm5,%xmm5
1540 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
1541
1542 leaq 128(%rdx),%rdx
1543 cmpq $0x80,%rcx
1544 jb .Ltail_avx
1545
1546 vpxor %xmm10,%xmm15,%xmm15
1547 subq $0x80,%rcx
1548 jmp .Loop8x_avx
1549
1550.align 32
1551.Loop8x_avx:
1552 vpunpckhqdq %xmm15,%xmm15,%xmm8
1553 vmovdqu 112(%rdx),%xmm14
1554 vpxor %xmm0,%xmm3,%xmm3
1555 vpxor %xmm15,%xmm8,%xmm8
1556 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
1557 vpshufb %xmm13,%xmm14,%xmm14
1558 vpxor %xmm1,%xmm4,%xmm4
1559 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
1560 vmovdqu 0-64(%rsi),%xmm6
1561 vpunpckhqdq %xmm14,%xmm14,%xmm9
1562 vpxor %xmm2,%xmm5,%xmm5
1563 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
1564 vmovdqu 32-64(%rsi),%xmm7
1565 vpxor %xmm14,%xmm9,%xmm9
1566
1567 vmovdqu 96(%rdx),%xmm15
1568 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1569 vpxor %xmm3,%xmm10,%xmm10
1570 vpshufb %xmm13,%xmm15,%xmm15
1571 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1572 vxorps %xmm4,%xmm11,%xmm11
1573 vmovdqu 16-64(%rsi),%xmm6
1574 vpunpckhqdq %xmm15,%xmm15,%xmm8
1575 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1576 vpxor %xmm5,%xmm12,%xmm12
1577 vxorps %xmm15,%xmm8,%xmm8
1578
1579 vmovdqu 80(%rdx),%xmm14
1580 vpxor %xmm10,%xmm12,%xmm12
1581 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1582 vpxor %xmm11,%xmm12,%xmm12
1583 vpslldq $8,%xmm12,%xmm9
1584 vpxor %xmm0,%xmm3,%xmm3
1585 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1586 vpsrldq $8,%xmm12,%xmm12
1587 vpxor %xmm9,%xmm10,%xmm10
1588 vmovdqu 48-64(%rsi),%xmm6
1589 vpshufb %xmm13,%xmm14,%xmm14
1590 vxorps %xmm12,%xmm11,%xmm11
1591 vpxor %xmm1,%xmm4,%xmm4
1592 vpunpckhqdq %xmm14,%xmm14,%xmm9
1593 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1594 vmovdqu 80-64(%rsi),%xmm7
1595 vpxor %xmm14,%xmm9,%xmm9
1596 vpxor %xmm2,%xmm5,%xmm5
1597
1598 vmovdqu 64(%rdx),%xmm15
1599 vpalignr $8,%xmm10,%xmm10,%xmm12
1600 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1601 vpshufb %xmm13,%xmm15,%xmm15
1602 vpxor %xmm3,%xmm0,%xmm0
1603 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1604 vmovdqu 64-64(%rsi),%xmm6
1605 vpunpckhqdq %xmm15,%xmm15,%xmm8
1606 vpxor %xmm4,%xmm1,%xmm1
1607 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1608 vxorps %xmm15,%xmm8,%xmm8
1609 vpxor %xmm5,%xmm2,%xmm2
1610
1611 vmovdqu 48(%rdx),%xmm14
1612 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
1613 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1614 vpshufb %xmm13,%xmm14,%xmm14
1615 vpxor %xmm0,%xmm3,%xmm3
1616 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1617 vmovdqu 96-64(%rsi),%xmm6
1618 vpunpckhqdq %xmm14,%xmm14,%xmm9
1619 vpxor %xmm1,%xmm4,%xmm4
1620 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1621 vmovdqu 128-64(%rsi),%xmm7
1622 vpxor %xmm14,%xmm9,%xmm9
1623 vpxor %xmm2,%xmm5,%xmm5
1624
1625 vmovdqu 32(%rdx),%xmm15
1626 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1627 vpshufb %xmm13,%xmm15,%xmm15
1628 vpxor %xmm3,%xmm0,%xmm0
1629 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1630 vmovdqu 112-64(%rsi),%xmm6
1631 vpunpckhqdq %xmm15,%xmm15,%xmm8
1632 vpxor %xmm4,%xmm1,%xmm1
1633 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1634 vpxor %xmm15,%xmm8,%xmm8
1635 vpxor %xmm5,%xmm2,%xmm2
1636 vxorps %xmm12,%xmm10,%xmm10
1637
1638 vmovdqu 16(%rdx),%xmm14
1639 vpalignr $8,%xmm10,%xmm10,%xmm12
1640 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1641 vpshufb %xmm13,%xmm14,%xmm14
1642 vpxor %xmm0,%xmm3,%xmm3
1643 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1644 vmovdqu 144-64(%rsi),%xmm6
1645 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
1646 vxorps %xmm11,%xmm12,%xmm12
1647 vpunpckhqdq %xmm14,%xmm14,%xmm9
1648 vpxor %xmm1,%xmm4,%xmm4
1649 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1650 vmovdqu 176-64(%rsi),%xmm7
1651 vpxor %xmm14,%xmm9,%xmm9
1652 vpxor %xmm2,%xmm5,%xmm5
1653
1654 vmovdqu (%rdx),%xmm15
1655 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1656 vpshufb %xmm13,%xmm15,%xmm15
1657 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1658 vmovdqu 160-64(%rsi),%xmm6
1659 vpxor %xmm12,%xmm15,%xmm15
1660 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
1661 vpxor %xmm10,%xmm15,%xmm15
1662
1663 leaq 128(%rdx),%rdx
1664 subq $0x80,%rcx
1665 jnc .Loop8x_avx
1666
1667 addq $0x80,%rcx
1668 jmp .Ltail_no_xor_avx
1669
1670.align 32
1671.Lshort_avx:
1672 vmovdqu -16(%rdx,%rcx,1),%xmm14
1673 leaq (%rdx,%rcx,1),%rdx
1674 vmovdqu 0-64(%rsi),%xmm6
1675 vmovdqu 32-64(%rsi),%xmm7
1676 vpshufb %xmm13,%xmm14,%xmm15
1677
1678 vmovdqa %xmm0,%xmm3
1679 vmovdqa %xmm1,%xmm4
1680 vmovdqa %xmm2,%xmm5
1681 subq $0x10,%rcx
1682 jz .Ltail_avx
1683
1684 vpunpckhqdq %xmm15,%xmm15,%xmm8
1685 vpxor %xmm0,%xmm3,%xmm3
1686 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1687 vpxor %xmm15,%xmm8,%xmm8
1688 vmovdqu -32(%rdx),%xmm14
1689 vpxor %xmm1,%xmm4,%xmm4
1690 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1691 vmovdqu 16-64(%rsi),%xmm6
1692 vpshufb %xmm13,%xmm14,%xmm15
1693 vpxor %xmm2,%xmm5,%xmm5
1694 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1695 vpsrldq $8,%xmm7,%xmm7
1696 subq $0x10,%rcx
1697 jz .Ltail_avx
1698
1699 vpunpckhqdq %xmm15,%xmm15,%xmm8
1700 vpxor %xmm0,%xmm3,%xmm3
1701 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1702 vpxor %xmm15,%xmm8,%xmm8
1703 vmovdqu -48(%rdx),%xmm14
1704 vpxor %xmm1,%xmm4,%xmm4
1705 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1706 vmovdqu 48-64(%rsi),%xmm6
1707 vpshufb %xmm13,%xmm14,%xmm15
1708 vpxor %xmm2,%xmm5,%xmm5
1709 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1710 vmovdqu 80-64(%rsi),%xmm7
1711 subq $0x10,%rcx
1712 jz .Ltail_avx
1713
1714 vpunpckhqdq %xmm15,%xmm15,%xmm8
1715 vpxor %xmm0,%xmm3,%xmm3
1716 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1717 vpxor %xmm15,%xmm8,%xmm8
1718 vmovdqu -64(%rdx),%xmm14
1719 vpxor %xmm1,%xmm4,%xmm4
1720 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1721 vmovdqu 64-64(%rsi),%xmm6
1722 vpshufb %xmm13,%xmm14,%xmm15
1723 vpxor %xmm2,%xmm5,%xmm5
1724 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1725 vpsrldq $8,%xmm7,%xmm7
1726 subq $0x10,%rcx
1727 jz .Ltail_avx
1728
1729 vpunpckhqdq %xmm15,%xmm15,%xmm8
1730 vpxor %xmm0,%xmm3,%xmm3
1731 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1732 vpxor %xmm15,%xmm8,%xmm8
1733 vmovdqu -80(%rdx),%xmm14
1734 vpxor %xmm1,%xmm4,%xmm4
1735 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1736 vmovdqu 96-64(%rsi),%xmm6
1737 vpshufb %xmm13,%xmm14,%xmm15
1738 vpxor %xmm2,%xmm5,%xmm5
1739 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1740 vmovdqu 128-64(%rsi),%xmm7
1741 subq $0x10,%rcx
1742 jz .Ltail_avx
1743
1744 vpunpckhqdq %xmm15,%xmm15,%xmm8
1745 vpxor %xmm0,%xmm3,%xmm3
1746 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1747 vpxor %xmm15,%xmm8,%xmm8
1748 vmovdqu -96(%rdx),%xmm14
1749 vpxor %xmm1,%xmm4,%xmm4
1750 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1751 vmovdqu 112-64(%rsi),%xmm6
1752 vpshufb %xmm13,%xmm14,%xmm15
1753 vpxor %xmm2,%xmm5,%xmm5
1754 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1755 vpsrldq $8,%xmm7,%xmm7
1756 subq $0x10,%rcx
1757 jz .Ltail_avx
1758
1759 vpunpckhqdq %xmm15,%xmm15,%xmm8
1760 vpxor %xmm0,%xmm3,%xmm3
1761 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1762 vpxor %xmm15,%xmm8,%xmm8
1763 vmovdqu -112(%rdx),%xmm14
1764 vpxor %xmm1,%xmm4,%xmm4
1765 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1766 vmovdqu 144-64(%rsi),%xmm6
1767 vpshufb %xmm13,%xmm14,%xmm15
1768 vpxor %xmm2,%xmm5,%xmm5
1769 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1770 vmovq 184-64(%rsi),%xmm7
1771 subq $0x10,%rcx
1772 jmp .Ltail_avx
1773
1774.align 32
1775.Ltail_avx:
1776 vpxor %xmm10,%xmm15,%xmm15
1777.Ltail_no_xor_avx:
1778 vpunpckhqdq %xmm15,%xmm15,%xmm8
1779 vpxor %xmm0,%xmm3,%xmm3
1780 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1781 vpxor %xmm15,%xmm8,%xmm8
1782 vpxor %xmm1,%xmm4,%xmm4
1783 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1784 vpxor %xmm2,%xmm5,%xmm5
1785 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1786
1787 vmovdqu (%r10),%xmm12
1788
1789 vpxor %xmm0,%xmm3,%xmm10
1790 vpxor %xmm1,%xmm4,%xmm11
1791 vpxor %xmm2,%xmm5,%xmm5
1792
1793 vpxor %xmm10,%xmm5,%xmm5
1794 vpxor %xmm11,%xmm5,%xmm5
1795 vpslldq $8,%xmm5,%xmm9
1796 vpsrldq $8,%xmm5,%xmm5
1797 vpxor %xmm9,%xmm10,%xmm10
1798 vpxor %xmm5,%xmm11,%xmm11
1799
1800 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
1801 vpalignr $8,%xmm10,%xmm10,%xmm10
1802 vpxor %xmm9,%xmm10,%xmm10
1803
1804 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
1805 vpalignr $8,%xmm10,%xmm10,%xmm10
1806 vpxor %xmm11,%xmm10,%xmm10
1807 vpxor %xmm9,%xmm10,%xmm10
1808
1809 cmpq $0,%rcx
1810 jne .Lshort_avx
1811
1812 vpshufb %xmm13,%xmm10,%xmm10
1813 vmovdqu %xmm10,(%rdi)
1814 vzeroupper
1815 .byte 0xf3,0xc3
1816.cfi_endproc
1817.size gcm_ghash_avx,.-gcm_ghash_avx
1818.align 64
1819.Lbswap_mask:
1820.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1821.L0x1c2_polynomial:
1822.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1823.L7_mask:
1824.long 7,0,7,0
1825.L7_mask_poly:
1826.long 7,0,450,0
1827.align 64
1828.type .Lrem_4bit,@object
1829.Lrem_4bit:
1830.long 0,0,0,471859200,0,943718400,0,610271232
1831.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
1832.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
1833.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
1834.type .Lrem_8bit,@object
1835.Lrem_8bit:
1836.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1837.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1838.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1839.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1840.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1841.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1842.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1843.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1844.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1845.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1846.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1847.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1848.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1849.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1850.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1851.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1852.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1853.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1854.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1855.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1856.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1857.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1858.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1859.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1860.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1861.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1862.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1863.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1864.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1865.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1866.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1867.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1868
1869.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1870.align 64
1871#endif
1872