1 | # This file is generated from a similarly-named Perl script in the BoringSSL |
2 | # source tree. Do not edit by hand. |
3 | |
4 | #if defined(__has_feature) |
5 | #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) |
6 | #define OPENSSL_NO_ASM |
7 | #endif |
8 | #endif |
9 | |
10 | #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) |
11 | #if defined(BORINGSSL_PREFIX) |
12 | #include <boringssl_prefix_symbols_asm.h> |
13 | #endif |
14 | .text |
15 | .extern OPENSSL_ia32cap_P |
16 | .hidden OPENSSL_ia32cap_P |
17 | |
18 | .globl gcm_gmult_4bit |
19 | .hidden gcm_gmult_4bit |
20 | .type gcm_gmult_4bit,@function |
21 | .align 16 |
22 | gcm_gmult_4bit: |
23 | .cfi_startproc |
24 | pushq %rbx |
25 | .cfi_adjust_cfa_offset 8 |
26 | .cfi_offset %rbx,-16 |
27 | pushq %rbp |
28 | .cfi_adjust_cfa_offset 8 |
29 | .cfi_offset %rbp,-24 |
30 | pushq %r12 |
31 | .cfi_adjust_cfa_offset 8 |
32 | .cfi_offset %r12,-32 |
33 | pushq %r13 |
34 | .cfi_adjust_cfa_offset 8 |
35 | .cfi_offset %r13,-40 |
36 | pushq %r14 |
37 | .cfi_adjust_cfa_offset 8 |
38 | .cfi_offset %r14,-48 |
39 | pushq %r15 |
40 | .cfi_adjust_cfa_offset 8 |
41 | .cfi_offset %r15,-56 |
42 | subq $280,%rsp |
43 | .cfi_adjust_cfa_offset 280 |
44 | .Lgmult_prologue: |
45 | |
46 | movzbq 15(%rdi),%r8 |
47 | leaq .Lrem_4bit(%rip),%r11 |
48 | xorq %rax,%rax |
49 | xorq %rbx,%rbx |
50 | movb %r8b,%al |
51 | movb %r8b,%bl |
52 | shlb $4,%al |
53 | movq $14,%rcx |
54 | movq 8(%rsi,%rax,1),%r8 |
55 | movq (%rsi,%rax,1),%r9 |
56 | andb $0xf0,%bl |
57 | movq %r8,%rdx |
58 | jmp .Loop1 |
59 | |
60 | .align 16 |
61 | .Loop1: |
62 | shrq $4,%r8 |
63 | andq $0xf,%rdx |
64 | movq %r9,%r10 |
65 | movb (%rdi,%rcx,1),%al |
66 | shrq $4,%r9 |
67 | xorq 8(%rsi,%rbx,1),%r8 |
68 | shlq $60,%r10 |
69 | xorq (%rsi,%rbx,1),%r9 |
70 | movb %al,%bl |
71 | xorq (%r11,%rdx,8),%r9 |
72 | movq %r8,%rdx |
73 | shlb $4,%al |
74 | xorq %r10,%r8 |
75 | decq %rcx |
76 | js .Lbreak1 |
77 | |
78 | shrq $4,%r8 |
79 | andq $0xf,%rdx |
80 | movq %r9,%r10 |
81 | shrq $4,%r9 |
82 | xorq 8(%rsi,%rax,1),%r8 |
83 | shlq $60,%r10 |
84 | xorq (%rsi,%rax,1),%r9 |
85 | andb $0xf0,%bl |
86 | xorq (%r11,%rdx,8),%r9 |
87 | movq %r8,%rdx |
88 | xorq %r10,%r8 |
89 | jmp .Loop1 |
90 | |
91 | .align 16 |
92 | .Lbreak1: |
93 | shrq $4,%r8 |
94 | andq $0xf,%rdx |
95 | movq %r9,%r10 |
96 | shrq $4,%r9 |
97 | xorq 8(%rsi,%rax,1),%r8 |
98 | shlq $60,%r10 |
99 | xorq (%rsi,%rax,1),%r9 |
100 | andb $0xf0,%bl |
101 | xorq (%r11,%rdx,8),%r9 |
102 | movq %r8,%rdx |
103 | xorq %r10,%r8 |
104 | |
105 | shrq $4,%r8 |
106 | andq $0xf,%rdx |
107 | movq %r9,%r10 |
108 | shrq $4,%r9 |
109 | xorq 8(%rsi,%rbx,1),%r8 |
110 | shlq $60,%r10 |
111 | xorq (%rsi,%rbx,1),%r9 |
112 | xorq %r10,%r8 |
113 | xorq (%r11,%rdx,8),%r9 |
114 | |
115 | bswapq %r8 |
116 | bswapq %r9 |
117 | movq %r8,8(%rdi) |
118 | movq %r9,(%rdi) |
119 | |
120 | leaq 280+48(%rsp),%rsi |
121 | .cfi_def_cfa %rsi,8 |
122 | movq -8(%rsi),%rbx |
123 | .cfi_restore %rbx |
124 | leaq (%rsi),%rsp |
125 | .cfi_def_cfa_register %rsp |
126 | .Lgmult_epilogue: |
127 | .byte 0xf3,0xc3 |
128 | .cfi_endproc |
129 | .size gcm_gmult_4bit,.-gcm_gmult_4bit |
130 | .globl gcm_ghash_4bit |
131 | .hidden gcm_ghash_4bit |
132 | .type gcm_ghash_4bit,@function |
133 | .align 16 |
134 | gcm_ghash_4bit: |
135 | .cfi_startproc |
136 | pushq %rbx |
137 | .cfi_adjust_cfa_offset 8 |
138 | .cfi_offset %rbx,-16 |
139 | pushq %rbp |
140 | .cfi_adjust_cfa_offset 8 |
141 | .cfi_offset %rbp,-24 |
142 | pushq %r12 |
143 | .cfi_adjust_cfa_offset 8 |
144 | .cfi_offset %r12,-32 |
145 | pushq %r13 |
146 | .cfi_adjust_cfa_offset 8 |
147 | .cfi_offset %r13,-40 |
148 | pushq %r14 |
149 | .cfi_adjust_cfa_offset 8 |
150 | .cfi_offset %r14,-48 |
151 | pushq %r15 |
152 | .cfi_adjust_cfa_offset 8 |
153 | .cfi_offset %r15,-56 |
154 | subq $280,%rsp |
155 | .cfi_adjust_cfa_offset 280 |
156 | .Lghash_prologue: |
157 | movq %rdx,%r14 |
158 | movq %rcx,%r15 |
159 | subq $-128,%rsi |
160 | leaq 16+128(%rsp),%rbp |
161 | xorl %edx,%edx |
162 | movq 0+0-128(%rsi),%r8 |
163 | movq 0+8-128(%rsi),%rax |
164 | movb %al,%dl |
165 | shrq $4,%rax |
166 | movq %r8,%r10 |
167 | shrq $4,%r8 |
168 | movq 16+0-128(%rsi),%r9 |
169 | shlb $4,%dl |
170 | movq 16+8-128(%rsi),%rbx |
171 | shlq $60,%r10 |
172 | movb %dl,0(%rsp) |
173 | orq %r10,%rax |
174 | movb %bl,%dl |
175 | shrq $4,%rbx |
176 | movq %r9,%r10 |
177 | shrq $4,%r9 |
178 | movq %r8,0(%rbp) |
179 | movq 32+0-128(%rsi),%r8 |
180 | shlb $4,%dl |
181 | movq %rax,0-128(%rbp) |
182 | movq 32+8-128(%rsi),%rax |
183 | shlq $60,%r10 |
184 | movb %dl,1(%rsp) |
185 | orq %r10,%rbx |
186 | movb %al,%dl |
187 | shrq $4,%rax |
188 | movq %r8,%r10 |
189 | shrq $4,%r8 |
190 | movq %r9,8(%rbp) |
191 | movq 48+0-128(%rsi),%r9 |
192 | shlb $4,%dl |
193 | movq %rbx,8-128(%rbp) |
194 | movq 48+8-128(%rsi),%rbx |
195 | shlq $60,%r10 |
196 | movb %dl,2(%rsp) |
197 | orq %r10,%rax |
198 | movb %bl,%dl |
199 | shrq $4,%rbx |
200 | movq %r9,%r10 |
201 | shrq $4,%r9 |
202 | movq %r8,16(%rbp) |
203 | movq 64+0-128(%rsi),%r8 |
204 | shlb $4,%dl |
205 | movq %rax,16-128(%rbp) |
206 | movq 64+8-128(%rsi),%rax |
207 | shlq $60,%r10 |
208 | movb %dl,3(%rsp) |
209 | orq %r10,%rbx |
210 | movb %al,%dl |
211 | shrq $4,%rax |
212 | movq %r8,%r10 |
213 | shrq $4,%r8 |
214 | movq %r9,24(%rbp) |
215 | movq 80+0-128(%rsi),%r9 |
216 | shlb $4,%dl |
217 | movq %rbx,24-128(%rbp) |
218 | movq 80+8-128(%rsi),%rbx |
219 | shlq $60,%r10 |
220 | movb %dl,4(%rsp) |
221 | orq %r10,%rax |
222 | movb %bl,%dl |
223 | shrq $4,%rbx |
224 | movq %r9,%r10 |
225 | shrq $4,%r9 |
226 | movq %r8,32(%rbp) |
227 | movq 96+0-128(%rsi),%r8 |
228 | shlb $4,%dl |
229 | movq %rax,32-128(%rbp) |
230 | movq 96+8-128(%rsi),%rax |
231 | shlq $60,%r10 |
232 | movb %dl,5(%rsp) |
233 | orq %r10,%rbx |
234 | movb %al,%dl |
235 | shrq $4,%rax |
236 | movq %r8,%r10 |
237 | shrq $4,%r8 |
238 | movq %r9,40(%rbp) |
239 | movq 112+0-128(%rsi),%r9 |
240 | shlb $4,%dl |
241 | movq %rbx,40-128(%rbp) |
242 | movq 112+8-128(%rsi),%rbx |
243 | shlq $60,%r10 |
244 | movb %dl,6(%rsp) |
245 | orq %r10,%rax |
246 | movb %bl,%dl |
247 | shrq $4,%rbx |
248 | movq %r9,%r10 |
249 | shrq $4,%r9 |
250 | movq %r8,48(%rbp) |
251 | movq 128+0-128(%rsi),%r8 |
252 | shlb $4,%dl |
253 | movq %rax,48-128(%rbp) |
254 | movq 128+8-128(%rsi),%rax |
255 | shlq $60,%r10 |
256 | movb %dl,7(%rsp) |
257 | orq %r10,%rbx |
258 | movb %al,%dl |
259 | shrq $4,%rax |
260 | movq %r8,%r10 |
261 | shrq $4,%r8 |
262 | movq %r9,56(%rbp) |
263 | movq 144+0-128(%rsi),%r9 |
264 | shlb $4,%dl |
265 | movq %rbx,56-128(%rbp) |
266 | movq 144+8-128(%rsi),%rbx |
267 | shlq $60,%r10 |
268 | movb %dl,8(%rsp) |
269 | orq %r10,%rax |
270 | movb %bl,%dl |
271 | shrq $4,%rbx |
272 | movq %r9,%r10 |
273 | shrq $4,%r9 |
274 | movq %r8,64(%rbp) |
275 | movq 160+0-128(%rsi),%r8 |
276 | shlb $4,%dl |
277 | movq %rax,64-128(%rbp) |
278 | movq 160+8-128(%rsi),%rax |
279 | shlq $60,%r10 |
280 | movb %dl,9(%rsp) |
281 | orq %r10,%rbx |
282 | movb %al,%dl |
283 | shrq $4,%rax |
284 | movq %r8,%r10 |
285 | shrq $4,%r8 |
286 | movq %r9,72(%rbp) |
287 | movq 176+0-128(%rsi),%r9 |
288 | shlb $4,%dl |
289 | movq %rbx,72-128(%rbp) |
290 | movq 176+8-128(%rsi),%rbx |
291 | shlq $60,%r10 |
292 | movb %dl,10(%rsp) |
293 | orq %r10,%rax |
294 | movb %bl,%dl |
295 | shrq $4,%rbx |
296 | movq %r9,%r10 |
297 | shrq $4,%r9 |
298 | movq %r8,80(%rbp) |
299 | movq 192+0-128(%rsi),%r8 |
300 | shlb $4,%dl |
301 | movq %rax,80-128(%rbp) |
302 | movq 192+8-128(%rsi),%rax |
303 | shlq $60,%r10 |
304 | movb %dl,11(%rsp) |
305 | orq %r10,%rbx |
306 | movb %al,%dl |
307 | shrq $4,%rax |
308 | movq %r8,%r10 |
309 | shrq $4,%r8 |
310 | movq %r9,88(%rbp) |
311 | movq 208+0-128(%rsi),%r9 |
312 | shlb $4,%dl |
313 | movq %rbx,88-128(%rbp) |
314 | movq 208+8-128(%rsi),%rbx |
315 | shlq $60,%r10 |
316 | movb %dl,12(%rsp) |
317 | orq %r10,%rax |
318 | movb %bl,%dl |
319 | shrq $4,%rbx |
320 | movq %r9,%r10 |
321 | shrq $4,%r9 |
322 | movq %r8,96(%rbp) |
323 | movq 224+0-128(%rsi),%r8 |
324 | shlb $4,%dl |
325 | movq %rax,96-128(%rbp) |
326 | movq 224+8-128(%rsi),%rax |
327 | shlq $60,%r10 |
328 | movb %dl,13(%rsp) |
329 | orq %r10,%rbx |
330 | movb %al,%dl |
331 | shrq $4,%rax |
332 | movq %r8,%r10 |
333 | shrq $4,%r8 |
334 | movq %r9,104(%rbp) |
335 | movq 240+0-128(%rsi),%r9 |
336 | shlb $4,%dl |
337 | movq %rbx,104-128(%rbp) |
338 | movq 240+8-128(%rsi),%rbx |
339 | shlq $60,%r10 |
340 | movb %dl,14(%rsp) |
341 | orq %r10,%rax |
342 | movb %bl,%dl |
343 | shrq $4,%rbx |
344 | movq %r9,%r10 |
345 | shrq $4,%r9 |
346 | movq %r8,112(%rbp) |
347 | shlb $4,%dl |
348 | movq %rax,112-128(%rbp) |
349 | shlq $60,%r10 |
350 | movb %dl,15(%rsp) |
351 | orq %r10,%rbx |
352 | movq %r9,120(%rbp) |
353 | movq %rbx,120-128(%rbp) |
354 | addq $-128,%rsi |
355 | movq 8(%rdi),%r8 |
356 | movq 0(%rdi),%r9 |
357 | addq %r14,%r15 |
358 | leaq .Lrem_8bit(%rip),%r11 |
359 | jmp .Louter_loop |
360 | .align 16 |
361 | .Louter_loop: |
362 | xorq (%r14),%r9 |
363 | movq 8(%r14),%rdx |
364 | leaq 16(%r14),%r14 |
365 | xorq %r8,%rdx |
366 | movq %r9,(%rdi) |
367 | movq %rdx,8(%rdi) |
368 | shrq $32,%rdx |
369 | xorq %rax,%rax |
370 | roll $8,%edx |
371 | movb %dl,%al |
372 | movzbl %dl,%ebx |
373 | shlb $4,%al |
374 | shrl $4,%ebx |
375 | roll $8,%edx |
376 | movq 8(%rsi,%rax,1),%r8 |
377 | movq (%rsi,%rax,1),%r9 |
378 | movb %dl,%al |
379 | movzbl %dl,%ecx |
380 | shlb $4,%al |
381 | movzbq (%rsp,%rbx,1),%r12 |
382 | shrl $4,%ecx |
383 | xorq %r8,%r12 |
384 | movq %r9,%r10 |
385 | shrq $8,%r8 |
386 | movzbq %r12b,%r12 |
387 | shrq $8,%r9 |
388 | xorq -128(%rbp,%rbx,8),%r8 |
389 | shlq $56,%r10 |
390 | xorq (%rbp,%rbx,8),%r9 |
391 | roll $8,%edx |
392 | xorq 8(%rsi,%rax,1),%r8 |
393 | xorq (%rsi,%rax,1),%r9 |
394 | movb %dl,%al |
395 | xorq %r10,%r8 |
396 | movzwq (%r11,%r12,2),%r12 |
397 | movzbl %dl,%ebx |
398 | shlb $4,%al |
399 | movzbq (%rsp,%rcx,1),%r13 |
400 | shrl $4,%ebx |
401 | shlq $48,%r12 |
402 | xorq %r8,%r13 |
403 | movq %r9,%r10 |
404 | xorq %r12,%r9 |
405 | shrq $8,%r8 |
406 | movzbq %r13b,%r13 |
407 | shrq $8,%r9 |
408 | xorq -128(%rbp,%rcx,8),%r8 |
409 | shlq $56,%r10 |
410 | xorq (%rbp,%rcx,8),%r9 |
411 | roll $8,%edx |
412 | xorq 8(%rsi,%rax,1),%r8 |
413 | xorq (%rsi,%rax,1),%r9 |
414 | movb %dl,%al |
415 | xorq %r10,%r8 |
416 | movzwq (%r11,%r13,2),%r13 |
417 | movzbl %dl,%ecx |
418 | shlb $4,%al |
419 | movzbq (%rsp,%rbx,1),%r12 |
420 | shrl $4,%ecx |
421 | shlq $48,%r13 |
422 | xorq %r8,%r12 |
423 | movq %r9,%r10 |
424 | xorq %r13,%r9 |
425 | shrq $8,%r8 |
426 | movzbq %r12b,%r12 |
427 | movl 8(%rdi),%edx |
428 | shrq $8,%r9 |
429 | xorq -128(%rbp,%rbx,8),%r8 |
430 | shlq $56,%r10 |
431 | xorq (%rbp,%rbx,8),%r9 |
432 | roll $8,%edx |
433 | xorq 8(%rsi,%rax,1),%r8 |
434 | xorq (%rsi,%rax,1),%r9 |
435 | movb %dl,%al |
436 | xorq %r10,%r8 |
437 | movzwq (%r11,%r12,2),%r12 |
438 | movzbl %dl,%ebx |
439 | shlb $4,%al |
440 | movzbq (%rsp,%rcx,1),%r13 |
441 | shrl $4,%ebx |
442 | shlq $48,%r12 |
443 | xorq %r8,%r13 |
444 | movq %r9,%r10 |
445 | xorq %r12,%r9 |
446 | shrq $8,%r8 |
447 | movzbq %r13b,%r13 |
448 | shrq $8,%r9 |
449 | xorq -128(%rbp,%rcx,8),%r8 |
450 | shlq $56,%r10 |
451 | xorq (%rbp,%rcx,8),%r9 |
452 | roll $8,%edx |
453 | xorq 8(%rsi,%rax,1),%r8 |
454 | xorq (%rsi,%rax,1),%r9 |
455 | movb %dl,%al |
456 | xorq %r10,%r8 |
457 | movzwq (%r11,%r13,2),%r13 |
458 | movzbl %dl,%ecx |
459 | shlb $4,%al |
460 | movzbq (%rsp,%rbx,1),%r12 |
461 | shrl $4,%ecx |
462 | shlq $48,%r13 |
463 | xorq %r8,%r12 |
464 | movq %r9,%r10 |
465 | xorq %r13,%r9 |
466 | shrq $8,%r8 |
467 | movzbq %r12b,%r12 |
468 | shrq $8,%r9 |
469 | xorq -128(%rbp,%rbx,8),%r8 |
470 | shlq $56,%r10 |
471 | xorq (%rbp,%rbx,8),%r9 |
472 | roll $8,%edx |
473 | xorq 8(%rsi,%rax,1),%r8 |
474 | xorq (%rsi,%rax,1),%r9 |
475 | movb %dl,%al |
476 | xorq %r10,%r8 |
477 | movzwq (%r11,%r12,2),%r12 |
478 | movzbl %dl,%ebx |
479 | shlb $4,%al |
480 | movzbq (%rsp,%rcx,1),%r13 |
481 | shrl $4,%ebx |
482 | shlq $48,%r12 |
483 | xorq %r8,%r13 |
484 | movq %r9,%r10 |
485 | xorq %r12,%r9 |
486 | shrq $8,%r8 |
487 | movzbq %r13b,%r13 |
488 | shrq $8,%r9 |
489 | xorq -128(%rbp,%rcx,8),%r8 |
490 | shlq $56,%r10 |
491 | xorq (%rbp,%rcx,8),%r9 |
492 | roll $8,%edx |
493 | xorq 8(%rsi,%rax,1),%r8 |
494 | xorq (%rsi,%rax,1),%r9 |
495 | movb %dl,%al |
496 | xorq %r10,%r8 |
497 | movzwq (%r11,%r13,2),%r13 |
498 | movzbl %dl,%ecx |
499 | shlb $4,%al |
500 | movzbq (%rsp,%rbx,1),%r12 |
501 | shrl $4,%ecx |
502 | shlq $48,%r13 |
503 | xorq %r8,%r12 |
504 | movq %r9,%r10 |
505 | xorq %r13,%r9 |
506 | shrq $8,%r8 |
507 | movzbq %r12b,%r12 |
508 | movl 4(%rdi),%edx |
509 | shrq $8,%r9 |
510 | xorq -128(%rbp,%rbx,8),%r8 |
511 | shlq $56,%r10 |
512 | xorq (%rbp,%rbx,8),%r9 |
513 | roll $8,%edx |
514 | xorq 8(%rsi,%rax,1),%r8 |
515 | xorq (%rsi,%rax,1),%r9 |
516 | movb %dl,%al |
517 | xorq %r10,%r8 |
518 | movzwq (%r11,%r12,2),%r12 |
519 | movzbl %dl,%ebx |
520 | shlb $4,%al |
521 | movzbq (%rsp,%rcx,1),%r13 |
522 | shrl $4,%ebx |
523 | shlq $48,%r12 |
524 | xorq %r8,%r13 |
525 | movq %r9,%r10 |
526 | xorq %r12,%r9 |
527 | shrq $8,%r8 |
528 | movzbq %r13b,%r13 |
529 | shrq $8,%r9 |
530 | xorq -128(%rbp,%rcx,8),%r8 |
531 | shlq $56,%r10 |
532 | xorq (%rbp,%rcx,8),%r9 |
533 | roll $8,%edx |
534 | xorq 8(%rsi,%rax,1),%r8 |
535 | xorq (%rsi,%rax,1),%r9 |
536 | movb %dl,%al |
537 | xorq %r10,%r8 |
538 | movzwq (%r11,%r13,2),%r13 |
539 | movzbl %dl,%ecx |
540 | shlb $4,%al |
541 | movzbq (%rsp,%rbx,1),%r12 |
542 | shrl $4,%ecx |
543 | shlq $48,%r13 |
544 | xorq %r8,%r12 |
545 | movq %r9,%r10 |
546 | xorq %r13,%r9 |
547 | shrq $8,%r8 |
548 | movzbq %r12b,%r12 |
549 | shrq $8,%r9 |
550 | xorq -128(%rbp,%rbx,8),%r8 |
551 | shlq $56,%r10 |
552 | xorq (%rbp,%rbx,8),%r9 |
553 | roll $8,%edx |
554 | xorq 8(%rsi,%rax,1),%r8 |
555 | xorq (%rsi,%rax,1),%r9 |
556 | movb %dl,%al |
557 | xorq %r10,%r8 |
558 | movzwq (%r11,%r12,2),%r12 |
559 | movzbl %dl,%ebx |
560 | shlb $4,%al |
561 | movzbq (%rsp,%rcx,1),%r13 |
562 | shrl $4,%ebx |
563 | shlq $48,%r12 |
564 | xorq %r8,%r13 |
565 | movq %r9,%r10 |
566 | xorq %r12,%r9 |
567 | shrq $8,%r8 |
568 | movzbq %r13b,%r13 |
569 | shrq $8,%r9 |
570 | xorq -128(%rbp,%rcx,8),%r8 |
571 | shlq $56,%r10 |
572 | xorq (%rbp,%rcx,8),%r9 |
573 | roll $8,%edx |
574 | xorq 8(%rsi,%rax,1),%r8 |
575 | xorq (%rsi,%rax,1),%r9 |
576 | movb %dl,%al |
577 | xorq %r10,%r8 |
578 | movzwq (%r11,%r13,2),%r13 |
579 | movzbl %dl,%ecx |
580 | shlb $4,%al |
581 | movzbq (%rsp,%rbx,1),%r12 |
582 | shrl $4,%ecx |
583 | shlq $48,%r13 |
584 | xorq %r8,%r12 |
585 | movq %r9,%r10 |
586 | xorq %r13,%r9 |
587 | shrq $8,%r8 |
588 | movzbq %r12b,%r12 |
589 | movl 0(%rdi),%edx |
590 | shrq $8,%r9 |
591 | xorq -128(%rbp,%rbx,8),%r8 |
592 | shlq $56,%r10 |
593 | xorq (%rbp,%rbx,8),%r9 |
594 | roll $8,%edx |
595 | xorq 8(%rsi,%rax,1),%r8 |
596 | xorq (%rsi,%rax,1),%r9 |
597 | movb %dl,%al |
598 | xorq %r10,%r8 |
599 | movzwq (%r11,%r12,2),%r12 |
600 | movzbl %dl,%ebx |
601 | shlb $4,%al |
602 | movzbq (%rsp,%rcx,1),%r13 |
603 | shrl $4,%ebx |
604 | shlq $48,%r12 |
605 | xorq %r8,%r13 |
606 | movq %r9,%r10 |
607 | xorq %r12,%r9 |
608 | shrq $8,%r8 |
609 | movzbq %r13b,%r13 |
610 | shrq $8,%r9 |
611 | xorq -128(%rbp,%rcx,8),%r8 |
612 | shlq $56,%r10 |
613 | xorq (%rbp,%rcx,8),%r9 |
614 | roll $8,%edx |
615 | xorq 8(%rsi,%rax,1),%r8 |
616 | xorq (%rsi,%rax,1),%r9 |
617 | movb %dl,%al |
618 | xorq %r10,%r8 |
619 | movzwq (%r11,%r13,2),%r13 |
620 | movzbl %dl,%ecx |
621 | shlb $4,%al |
622 | movzbq (%rsp,%rbx,1),%r12 |
623 | shrl $4,%ecx |
624 | shlq $48,%r13 |
625 | xorq %r8,%r12 |
626 | movq %r9,%r10 |
627 | xorq %r13,%r9 |
628 | shrq $8,%r8 |
629 | movzbq %r12b,%r12 |
630 | shrq $8,%r9 |
631 | xorq -128(%rbp,%rbx,8),%r8 |
632 | shlq $56,%r10 |
633 | xorq (%rbp,%rbx,8),%r9 |
634 | roll $8,%edx |
635 | xorq 8(%rsi,%rax,1),%r8 |
636 | xorq (%rsi,%rax,1),%r9 |
637 | movb %dl,%al |
638 | xorq %r10,%r8 |
639 | movzwq (%r11,%r12,2),%r12 |
640 | movzbl %dl,%ebx |
641 | shlb $4,%al |
642 | movzbq (%rsp,%rcx,1),%r13 |
643 | shrl $4,%ebx |
644 | shlq $48,%r12 |
645 | xorq %r8,%r13 |
646 | movq %r9,%r10 |
647 | xorq %r12,%r9 |
648 | shrq $8,%r8 |
649 | movzbq %r13b,%r13 |
650 | shrq $8,%r9 |
651 | xorq -128(%rbp,%rcx,8),%r8 |
652 | shlq $56,%r10 |
653 | xorq (%rbp,%rcx,8),%r9 |
654 | roll $8,%edx |
655 | xorq 8(%rsi,%rax,1),%r8 |
656 | xorq (%rsi,%rax,1),%r9 |
657 | movb %dl,%al |
658 | xorq %r10,%r8 |
659 | movzwq (%r11,%r13,2),%r13 |
660 | movzbl %dl,%ecx |
661 | shlb $4,%al |
662 | movzbq (%rsp,%rbx,1),%r12 |
663 | andl $240,%ecx |
664 | shlq $48,%r13 |
665 | xorq %r8,%r12 |
666 | movq %r9,%r10 |
667 | xorq %r13,%r9 |
668 | shrq $8,%r8 |
669 | movzbq %r12b,%r12 |
670 | movl -4(%rdi),%edx |
671 | shrq $8,%r9 |
672 | xorq -128(%rbp,%rbx,8),%r8 |
673 | shlq $56,%r10 |
674 | xorq (%rbp,%rbx,8),%r9 |
675 | movzwq (%r11,%r12,2),%r12 |
676 | xorq 8(%rsi,%rax,1),%r8 |
677 | xorq (%rsi,%rax,1),%r9 |
678 | shlq $48,%r12 |
679 | xorq %r10,%r8 |
680 | xorq %r12,%r9 |
681 | movzbq %r8b,%r13 |
682 | shrq $4,%r8 |
683 | movq %r9,%r10 |
684 | shlb $4,%r13b |
685 | shrq $4,%r9 |
686 | xorq 8(%rsi,%rcx,1),%r8 |
687 | movzwq (%r11,%r13,2),%r13 |
688 | shlq $60,%r10 |
689 | xorq (%rsi,%rcx,1),%r9 |
690 | xorq %r10,%r8 |
691 | shlq $48,%r13 |
692 | bswapq %r8 |
693 | xorq %r13,%r9 |
694 | bswapq %r9 |
695 | cmpq %r15,%r14 |
696 | jb .Louter_loop |
697 | movq %r8,8(%rdi) |
698 | movq %r9,(%rdi) |
699 | |
700 | leaq 280+48(%rsp),%rsi |
701 | .cfi_def_cfa %rsi,8 |
702 | movq -48(%rsi),%r15 |
703 | .cfi_restore %r15 |
704 | movq -40(%rsi),%r14 |
705 | .cfi_restore %r14 |
706 | movq -32(%rsi),%r13 |
707 | .cfi_restore %r13 |
708 | movq -24(%rsi),%r12 |
709 | .cfi_restore %r12 |
710 | movq -16(%rsi),%rbp |
711 | .cfi_restore %rbp |
712 | movq -8(%rsi),%rbx |
713 | .cfi_restore %rbx |
714 | leaq 0(%rsi),%rsp |
715 | .cfi_def_cfa_register %rsp |
716 | .Lghash_epilogue: |
717 | .byte 0xf3,0xc3 |
718 | .cfi_endproc |
719 | .size gcm_ghash_4bit,.-gcm_ghash_4bit |
720 | .globl gcm_init_clmul |
721 | .hidden gcm_init_clmul |
722 | .type gcm_init_clmul,@function |
723 | .align 16 |
724 | gcm_init_clmul: |
725 | .cfi_startproc |
726 | .L_init_clmul: |
727 | movdqu (%rsi),%xmm2 |
728 | pshufd $78,%xmm2,%xmm2 |
729 | |
730 | |
731 | pshufd $255,%xmm2,%xmm4 |
732 | movdqa %xmm2,%xmm3 |
733 | psllq $1,%xmm2 |
734 | pxor %xmm5,%xmm5 |
735 | psrlq $63,%xmm3 |
736 | pcmpgtd %xmm4,%xmm5 |
737 | pslldq $8,%xmm3 |
738 | por %xmm3,%xmm2 |
739 | |
740 | |
741 | pand .L0x1c2_polynomial(%rip),%xmm5 |
742 | pxor %xmm5,%xmm2 |
743 | |
744 | |
745 | pshufd $78,%xmm2,%xmm6 |
746 | movdqa %xmm2,%xmm0 |
747 | pxor %xmm2,%xmm6 |
748 | movdqa %xmm0,%xmm1 |
749 | pshufd $78,%xmm0,%xmm3 |
750 | pxor %xmm0,%xmm3 |
751 | .byte 102,15,58,68,194,0 |
752 | .byte 102,15,58,68,202,17 |
753 | .byte 102,15,58,68,222,0 |
754 | pxor %xmm0,%xmm3 |
755 | pxor %xmm1,%xmm3 |
756 | |
757 | movdqa %xmm3,%xmm4 |
758 | psrldq $8,%xmm3 |
759 | pslldq $8,%xmm4 |
760 | pxor %xmm3,%xmm1 |
761 | pxor %xmm4,%xmm0 |
762 | |
763 | movdqa %xmm0,%xmm4 |
764 | movdqa %xmm0,%xmm3 |
765 | psllq $5,%xmm0 |
766 | pxor %xmm0,%xmm3 |
767 | psllq $1,%xmm0 |
768 | pxor %xmm3,%xmm0 |
769 | psllq $57,%xmm0 |
770 | movdqa %xmm0,%xmm3 |
771 | pslldq $8,%xmm0 |
772 | psrldq $8,%xmm3 |
773 | pxor %xmm4,%xmm0 |
774 | pxor %xmm3,%xmm1 |
775 | |
776 | |
777 | movdqa %xmm0,%xmm4 |
778 | psrlq $1,%xmm0 |
779 | pxor %xmm4,%xmm1 |
780 | pxor %xmm0,%xmm4 |
781 | psrlq $5,%xmm0 |
782 | pxor %xmm4,%xmm0 |
783 | psrlq $1,%xmm0 |
784 | pxor %xmm1,%xmm0 |
785 | pshufd $78,%xmm2,%xmm3 |
786 | pshufd $78,%xmm0,%xmm4 |
787 | pxor %xmm2,%xmm3 |
788 | movdqu %xmm2,0(%rdi) |
789 | pxor %xmm0,%xmm4 |
790 | movdqu %xmm0,16(%rdi) |
791 | .byte 102,15,58,15,227,8 |
792 | movdqu %xmm4,32(%rdi) |
793 | movdqa %xmm0,%xmm1 |
794 | pshufd $78,%xmm0,%xmm3 |
795 | pxor %xmm0,%xmm3 |
796 | .byte 102,15,58,68,194,0 |
797 | .byte 102,15,58,68,202,17 |
798 | .byte 102,15,58,68,222,0 |
799 | pxor %xmm0,%xmm3 |
800 | pxor %xmm1,%xmm3 |
801 | |
802 | movdqa %xmm3,%xmm4 |
803 | psrldq $8,%xmm3 |
804 | pslldq $8,%xmm4 |
805 | pxor %xmm3,%xmm1 |
806 | pxor %xmm4,%xmm0 |
807 | |
808 | movdqa %xmm0,%xmm4 |
809 | movdqa %xmm0,%xmm3 |
810 | psllq $5,%xmm0 |
811 | pxor %xmm0,%xmm3 |
812 | psllq $1,%xmm0 |
813 | pxor %xmm3,%xmm0 |
814 | psllq $57,%xmm0 |
815 | movdqa %xmm0,%xmm3 |
816 | pslldq $8,%xmm0 |
817 | psrldq $8,%xmm3 |
818 | pxor %xmm4,%xmm0 |
819 | pxor %xmm3,%xmm1 |
820 | |
821 | |
822 | movdqa %xmm0,%xmm4 |
823 | psrlq $1,%xmm0 |
824 | pxor %xmm4,%xmm1 |
825 | pxor %xmm0,%xmm4 |
826 | psrlq $5,%xmm0 |
827 | pxor %xmm4,%xmm0 |
828 | psrlq $1,%xmm0 |
829 | pxor %xmm1,%xmm0 |
830 | movdqa %xmm0,%xmm5 |
831 | movdqa %xmm0,%xmm1 |
832 | pshufd $78,%xmm0,%xmm3 |
833 | pxor %xmm0,%xmm3 |
834 | .byte 102,15,58,68,194,0 |
835 | .byte 102,15,58,68,202,17 |
836 | .byte 102,15,58,68,222,0 |
837 | pxor %xmm0,%xmm3 |
838 | pxor %xmm1,%xmm3 |
839 | |
840 | movdqa %xmm3,%xmm4 |
841 | psrldq $8,%xmm3 |
842 | pslldq $8,%xmm4 |
843 | pxor %xmm3,%xmm1 |
844 | pxor %xmm4,%xmm0 |
845 | |
846 | movdqa %xmm0,%xmm4 |
847 | movdqa %xmm0,%xmm3 |
848 | psllq $5,%xmm0 |
849 | pxor %xmm0,%xmm3 |
850 | psllq $1,%xmm0 |
851 | pxor %xmm3,%xmm0 |
852 | psllq $57,%xmm0 |
853 | movdqa %xmm0,%xmm3 |
854 | pslldq $8,%xmm0 |
855 | psrldq $8,%xmm3 |
856 | pxor %xmm4,%xmm0 |
857 | pxor %xmm3,%xmm1 |
858 | |
859 | |
860 | movdqa %xmm0,%xmm4 |
861 | psrlq $1,%xmm0 |
862 | pxor %xmm4,%xmm1 |
863 | pxor %xmm0,%xmm4 |
864 | psrlq $5,%xmm0 |
865 | pxor %xmm4,%xmm0 |
866 | psrlq $1,%xmm0 |
867 | pxor %xmm1,%xmm0 |
868 | pshufd $78,%xmm5,%xmm3 |
869 | pshufd $78,%xmm0,%xmm4 |
870 | pxor %xmm5,%xmm3 |
871 | movdqu %xmm5,48(%rdi) |
872 | pxor %xmm0,%xmm4 |
873 | movdqu %xmm0,64(%rdi) |
874 | .byte 102,15,58,15,227,8 |
875 | movdqu %xmm4,80(%rdi) |
876 | .byte 0xf3,0xc3 |
877 | .cfi_endproc |
878 | .size gcm_init_clmul,.-gcm_init_clmul |
879 | .globl gcm_gmult_clmul |
880 | .hidden gcm_gmult_clmul |
881 | .type gcm_gmult_clmul,@function |
882 | .align 16 |
883 | gcm_gmult_clmul: |
884 | .cfi_startproc |
885 | .L_gmult_clmul: |
886 | movdqu (%rdi),%xmm0 |
887 | movdqa .Lbswap_mask(%rip),%xmm5 |
888 | movdqu (%rsi),%xmm2 |
889 | movdqu 32(%rsi),%xmm4 |
890 | .byte 102,15,56,0,197 |
891 | movdqa %xmm0,%xmm1 |
892 | pshufd $78,%xmm0,%xmm3 |
893 | pxor %xmm0,%xmm3 |
894 | .byte 102,15,58,68,194,0 |
895 | .byte 102,15,58,68,202,17 |
896 | .byte 102,15,58,68,220,0 |
897 | pxor %xmm0,%xmm3 |
898 | pxor %xmm1,%xmm3 |
899 | |
900 | movdqa %xmm3,%xmm4 |
901 | psrldq $8,%xmm3 |
902 | pslldq $8,%xmm4 |
903 | pxor %xmm3,%xmm1 |
904 | pxor %xmm4,%xmm0 |
905 | |
906 | movdqa %xmm0,%xmm4 |
907 | movdqa %xmm0,%xmm3 |
908 | psllq $5,%xmm0 |
909 | pxor %xmm0,%xmm3 |
910 | psllq $1,%xmm0 |
911 | pxor %xmm3,%xmm0 |
912 | psllq $57,%xmm0 |
913 | movdqa %xmm0,%xmm3 |
914 | pslldq $8,%xmm0 |
915 | psrldq $8,%xmm3 |
916 | pxor %xmm4,%xmm0 |
917 | pxor %xmm3,%xmm1 |
918 | |
919 | |
920 | movdqa %xmm0,%xmm4 |
921 | psrlq $1,%xmm0 |
922 | pxor %xmm4,%xmm1 |
923 | pxor %xmm0,%xmm4 |
924 | psrlq $5,%xmm0 |
925 | pxor %xmm4,%xmm0 |
926 | psrlq $1,%xmm0 |
927 | pxor %xmm1,%xmm0 |
928 | .byte 102,15,56,0,197 |
929 | movdqu %xmm0,(%rdi) |
930 | .byte 0xf3,0xc3 |
931 | .cfi_endproc |
932 | .size gcm_gmult_clmul,.-gcm_gmult_clmul |
933 | .globl gcm_ghash_clmul |
934 | .hidden gcm_ghash_clmul |
935 | .type gcm_ghash_clmul,@function |
936 | .align 32 |
937 | gcm_ghash_clmul: |
938 | .cfi_startproc |
939 | .L_ghash_clmul: |
940 | movdqa .Lbswap_mask(%rip),%xmm10 |
941 | |
942 | movdqu (%rdi),%xmm0 |
943 | movdqu (%rsi),%xmm2 |
944 | movdqu 32(%rsi),%xmm7 |
945 | .byte 102,65,15,56,0,194 |
946 | |
947 | subq $0x10,%rcx |
948 | jz .Lodd_tail |
949 | |
950 | movdqu 16(%rsi),%xmm6 |
951 | leaq OPENSSL_ia32cap_P(%rip),%rax |
952 | movl 4(%rax),%eax |
953 | cmpq $0x30,%rcx |
954 | jb .Lskip4x |
955 | |
956 | andl $71303168,%eax |
957 | cmpl $4194304,%eax |
958 | je .Lskip4x |
959 | |
960 | subq $0x30,%rcx |
961 | movq $0xA040608020C0E000,%rax |
962 | movdqu 48(%rsi),%xmm14 |
963 | movdqu 64(%rsi),%xmm15 |
964 | |
965 | |
966 | |
967 | |
968 | movdqu 48(%rdx),%xmm3 |
969 | movdqu 32(%rdx),%xmm11 |
970 | .byte 102,65,15,56,0,218 |
971 | .byte 102,69,15,56,0,218 |
972 | movdqa %xmm3,%xmm5 |
973 | pshufd $78,%xmm3,%xmm4 |
974 | pxor %xmm3,%xmm4 |
975 | .byte 102,15,58,68,218,0 |
976 | .byte 102,15,58,68,234,17 |
977 | .byte 102,15,58,68,231,0 |
978 | |
979 | movdqa %xmm11,%xmm13 |
980 | pshufd $78,%xmm11,%xmm12 |
981 | pxor %xmm11,%xmm12 |
982 | .byte 102,68,15,58,68,222,0 |
983 | .byte 102,68,15,58,68,238,17 |
984 | .byte 102,68,15,58,68,231,16 |
985 | xorps %xmm11,%xmm3 |
986 | xorps %xmm13,%xmm5 |
987 | movups 80(%rsi),%xmm7 |
988 | xorps %xmm12,%xmm4 |
989 | |
990 | movdqu 16(%rdx),%xmm11 |
991 | movdqu 0(%rdx),%xmm8 |
992 | .byte 102,69,15,56,0,218 |
993 | .byte 102,69,15,56,0,194 |
994 | movdqa %xmm11,%xmm13 |
995 | pshufd $78,%xmm11,%xmm12 |
996 | pxor %xmm8,%xmm0 |
997 | pxor %xmm11,%xmm12 |
998 | .byte 102,69,15,58,68,222,0 |
999 | movdqa %xmm0,%xmm1 |
1000 | pshufd $78,%xmm0,%xmm8 |
1001 | pxor %xmm0,%xmm8 |
1002 | .byte 102,69,15,58,68,238,17 |
1003 | .byte 102,68,15,58,68,231,0 |
1004 | xorps %xmm11,%xmm3 |
1005 | xorps %xmm13,%xmm5 |
1006 | |
1007 | leaq 64(%rdx),%rdx |
1008 | subq $0x40,%rcx |
1009 | jc .Ltail4x |
1010 | |
1011 | jmp .Lmod4_loop |
1012 | .align 32 |
1013 | .Lmod4_loop: |
1014 | .byte 102,65,15,58,68,199,0 |
1015 | xorps %xmm12,%xmm4 |
1016 | movdqu 48(%rdx),%xmm11 |
1017 | .byte 102,69,15,56,0,218 |
1018 | .byte 102,65,15,58,68,207,17 |
1019 | xorps %xmm3,%xmm0 |
1020 | movdqu 32(%rdx),%xmm3 |
1021 | movdqa %xmm11,%xmm13 |
1022 | .byte 102,68,15,58,68,199,16 |
1023 | pshufd $78,%xmm11,%xmm12 |
1024 | xorps %xmm5,%xmm1 |
1025 | pxor %xmm11,%xmm12 |
1026 | .byte 102,65,15,56,0,218 |
1027 | movups 32(%rsi),%xmm7 |
1028 | xorps %xmm4,%xmm8 |
1029 | .byte 102,68,15,58,68,218,0 |
1030 | pshufd $78,%xmm3,%xmm4 |
1031 | |
1032 | pxor %xmm0,%xmm8 |
1033 | movdqa %xmm3,%xmm5 |
1034 | pxor %xmm1,%xmm8 |
1035 | pxor %xmm3,%xmm4 |
1036 | movdqa %xmm8,%xmm9 |
1037 | .byte 102,68,15,58,68,234,17 |
1038 | pslldq $8,%xmm8 |
1039 | psrldq $8,%xmm9 |
1040 | pxor %xmm8,%xmm0 |
1041 | movdqa .L7_mask(%rip),%xmm8 |
1042 | pxor %xmm9,%xmm1 |
1043 | .byte 102,76,15,110,200 |
1044 | |
1045 | pand %xmm0,%xmm8 |
1046 | .byte 102,69,15,56,0,200 |
1047 | pxor %xmm0,%xmm9 |
1048 | .byte 102,68,15,58,68,231,0 |
1049 | psllq $57,%xmm9 |
1050 | movdqa %xmm9,%xmm8 |
1051 | pslldq $8,%xmm9 |
1052 | .byte 102,15,58,68,222,0 |
1053 | psrldq $8,%xmm8 |
1054 | pxor %xmm9,%xmm0 |
1055 | pxor %xmm8,%xmm1 |
1056 | movdqu 0(%rdx),%xmm8 |
1057 | |
1058 | movdqa %xmm0,%xmm9 |
1059 | psrlq $1,%xmm0 |
1060 | .byte 102,15,58,68,238,17 |
1061 | xorps %xmm11,%xmm3 |
1062 | movdqu 16(%rdx),%xmm11 |
1063 | .byte 102,69,15,56,0,218 |
1064 | .byte 102,15,58,68,231,16 |
1065 | xorps %xmm13,%xmm5 |
1066 | movups 80(%rsi),%xmm7 |
1067 | .byte 102,69,15,56,0,194 |
1068 | pxor %xmm9,%xmm1 |
1069 | pxor %xmm0,%xmm9 |
1070 | psrlq $5,%xmm0 |
1071 | |
1072 | movdqa %xmm11,%xmm13 |
1073 | pxor %xmm12,%xmm4 |
1074 | pshufd $78,%xmm11,%xmm12 |
1075 | pxor %xmm9,%xmm0 |
1076 | pxor %xmm8,%xmm1 |
1077 | pxor %xmm11,%xmm12 |
1078 | .byte 102,69,15,58,68,222,0 |
1079 | psrlq $1,%xmm0 |
1080 | pxor %xmm1,%xmm0 |
1081 | movdqa %xmm0,%xmm1 |
1082 | .byte 102,69,15,58,68,238,17 |
1083 | xorps %xmm11,%xmm3 |
1084 | pshufd $78,%xmm0,%xmm8 |
1085 | pxor %xmm0,%xmm8 |
1086 | |
1087 | .byte 102,68,15,58,68,231,0 |
1088 | xorps %xmm13,%xmm5 |
1089 | |
1090 | leaq 64(%rdx),%rdx |
1091 | subq $0x40,%rcx |
1092 | jnc .Lmod4_loop |
1093 | |
1094 | .Ltail4x: |
1095 | .byte 102,65,15,58,68,199,0 |
1096 | .byte 102,65,15,58,68,207,17 |
1097 | .byte 102,68,15,58,68,199,16 |
1098 | xorps %xmm12,%xmm4 |
1099 | xorps %xmm3,%xmm0 |
1100 | xorps %xmm5,%xmm1 |
1101 | pxor %xmm0,%xmm1 |
1102 | pxor %xmm4,%xmm8 |
1103 | |
1104 | pxor %xmm1,%xmm8 |
1105 | pxor %xmm0,%xmm1 |
1106 | |
1107 | movdqa %xmm8,%xmm9 |
1108 | psrldq $8,%xmm8 |
1109 | pslldq $8,%xmm9 |
1110 | pxor %xmm8,%xmm1 |
1111 | pxor %xmm9,%xmm0 |
1112 | |
1113 | movdqa %xmm0,%xmm4 |
1114 | movdqa %xmm0,%xmm3 |
1115 | psllq $5,%xmm0 |
1116 | pxor %xmm0,%xmm3 |
1117 | psllq $1,%xmm0 |
1118 | pxor %xmm3,%xmm0 |
1119 | psllq $57,%xmm0 |
1120 | movdqa %xmm0,%xmm3 |
1121 | pslldq $8,%xmm0 |
1122 | psrldq $8,%xmm3 |
1123 | pxor %xmm4,%xmm0 |
1124 | pxor %xmm3,%xmm1 |
1125 | |
1126 | |
1127 | movdqa %xmm0,%xmm4 |
1128 | psrlq $1,%xmm0 |
1129 | pxor %xmm4,%xmm1 |
1130 | pxor %xmm0,%xmm4 |
1131 | psrlq $5,%xmm0 |
1132 | pxor %xmm4,%xmm0 |
1133 | psrlq $1,%xmm0 |
1134 | pxor %xmm1,%xmm0 |
1135 | addq $0x40,%rcx |
1136 | jz .Ldone |
1137 | movdqu 32(%rsi),%xmm7 |
1138 | subq $0x10,%rcx |
1139 | jz .Lodd_tail |
1140 | .Lskip4x: |
1141 | |
1142 | |
1143 | |
1144 | |
1145 | |
1146 | movdqu (%rdx),%xmm8 |
1147 | movdqu 16(%rdx),%xmm3 |
1148 | .byte 102,69,15,56,0,194 |
1149 | .byte 102,65,15,56,0,218 |
1150 | pxor %xmm8,%xmm0 |
1151 | |
1152 | movdqa %xmm3,%xmm5 |
1153 | pshufd $78,%xmm3,%xmm4 |
1154 | pxor %xmm3,%xmm4 |
1155 | .byte 102,15,58,68,218,0 |
1156 | .byte 102,15,58,68,234,17 |
1157 | .byte 102,15,58,68,231,0 |
1158 | |
1159 | leaq 32(%rdx),%rdx |
1160 | nop |
1161 | subq $0x20,%rcx |
1162 | jbe .Leven_tail |
1163 | nop |
1164 | jmp .Lmod_loop |
1165 | |
1166 | .align 32 |
1167 | .Lmod_loop: |
1168 | movdqa %xmm0,%xmm1 |
1169 | movdqa %xmm4,%xmm8 |
1170 | pshufd $78,%xmm0,%xmm4 |
1171 | pxor %xmm0,%xmm4 |
1172 | |
1173 | .byte 102,15,58,68,198,0 |
1174 | .byte 102,15,58,68,206,17 |
1175 | .byte 102,15,58,68,231,16 |
1176 | |
1177 | pxor %xmm3,%xmm0 |
1178 | pxor %xmm5,%xmm1 |
1179 | movdqu (%rdx),%xmm9 |
1180 | pxor %xmm0,%xmm8 |
1181 | .byte 102,69,15,56,0,202 |
1182 | movdqu 16(%rdx),%xmm3 |
1183 | |
1184 | pxor %xmm1,%xmm8 |
1185 | pxor %xmm9,%xmm1 |
1186 | pxor %xmm8,%xmm4 |
1187 | .byte 102,65,15,56,0,218 |
1188 | movdqa %xmm4,%xmm8 |
1189 | psrldq $8,%xmm8 |
1190 | pslldq $8,%xmm4 |
1191 | pxor %xmm8,%xmm1 |
1192 | pxor %xmm4,%xmm0 |
1193 | |
1194 | movdqa %xmm3,%xmm5 |
1195 | |
1196 | movdqa %xmm0,%xmm9 |
1197 | movdqa %xmm0,%xmm8 |
1198 | psllq $5,%xmm0 |
1199 | pxor %xmm0,%xmm8 |
1200 | .byte 102,15,58,68,218,0 |
1201 | psllq $1,%xmm0 |
1202 | pxor %xmm8,%xmm0 |
1203 | psllq $57,%xmm0 |
1204 | movdqa %xmm0,%xmm8 |
1205 | pslldq $8,%xmm0 |
1206 | psrldq $8,%xmm8 |
1207 | pxor %xmm9,%xmm0 |
1208 | pshufd $78,%xmm5,%xmm4 |
1209 | pxor %xmm8,%xmm1 |
1210 | pxor %xmm5,%xmm4 |
1211 | |
1212 | movdqa %xmm0,%xmm9 |
1213 | psrlq $1,%xmm0 |
1214 | .byte 102,15,58,68,234,17 |
1215 | pxor %xmm9,%xmm1 |
1216 | pxor %xmm0,%xmm9 |
1217 | psrlq $5,%xmm0 |
1218 | pxor %xmm9,%xmm0 |
1219 | leaq 32(%rdx),%rdx |
1220 | psrlq $1,%xmm0 |
1221 | .byte 102,15,58,68,231,0 |
1222 | pxor %xmm1,%xmm0 |
1223 | |
1224 | subq $0x20,%rcx |
1225 | ja .Lmod_loop |
1226 | |
1227 | .Leven_tail: |
1228 | movdqa %xmm0,%xmm1 |
1229 | movdqa %xmm4,%xmm8 |
1230 | pshufd $78,%xmm0,%xmm4 |
1231 | pxor %xmm0,%xmm4 |
1232 | |
1233 | .byte 102,15,58,68,198,0 |
1234 | .byte 102,15,58,68,206,17 |
1235 | .byte 102,15,58,68,231,16 |
1236 | |
1237 | pxor %xmm3,%xmm0 |
1238 | pxor %xmm5,%xmm1 |
1239 | pxor %xmm0,%xmm8 |
1240 | pxor %xmm1,%xmm8 |
1241 | pxor %xmm8,%xmm4 |
1242 | movdqa %xmm4,%xmm8 |
1243 | psrldq $8,%xmm8 |
1244 | pslldq $8,%xmm4 |
1245 | pxor %xmm8,%xmm1 |
1246 | pxor %xmm4,%xmm0 |
1247 | |
1248 | movdqa %xmm0,%xmm4 |
1249 | movdqa %xmm0,%xmm3 |
1250 | psllq $5,%xmm0 |
1251 | pxor %xmm0,%xmm3 |
1252 | psllq $1,%xmm0 |
1253 | pxor %xmm3,%xmm0 |
1254 | psllq $57,%xmm0 |
1255 | movdqa %xmm0,%xmm3 |
1256 | pslldq $8,%xmm0 |
1257 | psrldq $8,%xmm3 |
1258 | pxor %xmm4,%xmm0 |
1259 | pxor %xmm3,%xmm1 |
1260 | |
1261 | |
1262 | movdqa %xmm0,%xmm4 |
1263 | psrlq $1,%xmm0 |
1264 | pxor %xmm4,%xmm1 |
1265 | pxor %xmm0,%xmm4 |
1266 | psrlq $5,%xmm0 |
1267 | pxor %xmm4,%xmm0 |
1268 | psrlq $1,%xmm0 |
1269 | pxor %xmm1,%xmm0 |
1270 | testq %rcx,%rcx |
1271 | jnz .Ldone |
1272 | |
1273 | .Lodd_tail: |
1274 | movdqu (%rdx),%xmm8 |
1275 | .byte 102,69,15,56,0,194 |
1276 | pxor %xmm8,%xmm0 |
1277 | movdqa %xmm0,%xmm1 |
1278 | pshufd $78,%xmm0,%xmm3 |
1279 | pxor %xmm0,%xmm3 |
1280 | .byte 102,15,58,68,194,0 |
1281 | .byte 102,15,58,68,202,17 |
1282 | .byte 102,15,58,68,223,0 |
1283 | pxor %xmm0,%xmm3 |
1284 | pxor %xmm1,%xmm3 |
1285 | |
1286 | movdqa %xmm3,%xmm4 |
1287 | psrldq $8,%xmm3 |
1288 | pslldq $8,%xmm4 |
1289 | pxor %xmm3,%xmm1 |
1290 | pxor %xmm4,%xmm0 |
1291 | |
1292 | movdqa %xmm0,%xmm4 |
1293 | movdqa %xmm0,%xmm3 |
1294 | psllq $5,%xmm0 |
1295 | pxor %xmm0,%xmm3 |
1296 | psllq $1,%xmm0 |
1297 | pxor %xmm3,%xmm0 |
1298 | psllq $57,%xmm0 |
1299 | movdqa %xmm0,%xmm3 |
1300 | pslldq $8,%xmm0 |
1301 | psrldq $8,%xmm3 |
1302 | pxor %xmm4,%xmm0 |
1303 | pxor %xmm3,%xmm1 |
1304 | |
1305 | |
1306 | movdqa %xmm0,%xmm4 |
1307 | psrlq $1,%xmm0 |
1308 | pxor %xmm4,%xmm1 |
1309 | pxor %xmm0,%xmm4 |
1310 | psrlq $5,%xmm0 |
1311 | pxor %xmm4,%xmm0 |
1312 | psrlq $1,%xmm0 |
1313 | pxor %xmm1,%xmm0 |
1314 | .Ldone: |
1315 | .byte 102,65,15,56,0,194 |
1316 | movdqu %xmm0,(%rdi) |
1317 | .byte 0xf3,0xc3 |
1318 | .cfi_endproc |
1319 | .size gcm_ghash_clmul,.-gcm_ghash_clmul |
1320 | .globl gcm_init_avx |
1321 | .hidden gcm_init_avx |
1322 | .type gcm_init_avx,@function |
1323 | .align 32 |
1324 | gcm_init_avx: |
1325 | .cfi_startproc |
1326 | vzeroupper |
1327 | |
1328 | vmovdqu (%rsi),%xmm2 |
1329 | vpshufd $78,%xmm2,%xmm2 |
1330 | |
1331 | |
1332 | vpshufd $255,%xmm2,%xmm4 |
1333 | vpsrlq $63,%xmm2,%xmm3 |
1334 | vpsllq $1,%xmm2,%xmm2 |
1335 | vpxor %xmm5,%xmm5,%xmm5 |
1336 | vpcmpgtd %xmm4,%xmm5,%xmm5 |
1337 | vpslldq $8,%xmm3,%xmm3 |
1338 | vpor %xmm3,%xmm2,%xmm2 |
1339 | |
1340 | |
1341 | vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 |
1342 | vpxor %xmm5,%xmm2,%xmm2 |
1343 | |
1344 | vpunpckhqdq %xmm2,%xmm2,%xmm6 |
1345 | vmovdqa %xmm2,%xmm0 |
1346 | vpxor %xmm2,%xmm6,%xmm6 |
1347 | movq $4,%r10 |
1348 | jmp .Linit_start_avx |
1349 | .align 32 |
1350 | .Linit_loop_avx: |
1351 | vpalignr $8,%xmm3,%xmm4,%xmm5 |
1352 | vmovdqu %xmm5,-16(%rdi) |
1353 | vpunpckhqdq %xmm0,%xmm0,%xmm3 |
1354 | vpxor %xmm0,%xmm3,%xmm3 |
1355 | vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 |
1356 | vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 |
1357 | vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 |
1358 | vpxor %xmm0,%xmm1,%xmm4 |
1359 | vpxor %xmm4,%xmm3,%xmm3 |
1360 | |
1361 | vpslldq $8,%xmm3,%xmm4 |
1362 | vpsrldq $8,%xmm3,%xmm3 |
1363 | vpxor %xmm4,%xmm0,%xmm0 |
1364 | vpxor %xmm3,%xmm1,%xmm1 |
1365 | vpsllq $57,%xmm0,%xmm3 |
1366 | vpsllq $62,%xmm0,%xmm4 |
1367 | vpxor %xmm3,%xmm4,%xmm4 |
1368 | vpsllq $63,%xmm0,%xmm3 |
1369 | vpxor %xmm3,%xmm4,%xmm4 |
1370 | vpslldq $8,%xmm4,%xmm3 |
1371 | vpsrldq $8,%xmm4,%xmm4 |
1372 | vpxor %xmm3,%xmm0,%xmm0 |
1373 | vpxor %xmm4,%xmm1,%xmm1 |
1374 | |
1375 | vpsrlq $1,%xmm0,%xmm4 |
1376 | vpxor %xmm0,%xmm1,%xmm1 |
1377 | vpxor %xmm4,%xmm0,%xmm0 |
1378 | vpsrlq $5,%xmm4,%xmm4 |
1379 | vpxor %xmm4,%xmm0,%xmm0 |
1380 | vpsrlq $1,%xmm0,%xmm0 |
1381 | vpxor %xmm1,%xmm0,%xmm0 |
1382 | .Linit_start_avx: |
1383 | vmovdqa %xmm0,%xmm5 |
1384 | vpunpckhqdq %xmm0,%xmm0,%xmm3 |
1385 | vpxor %xmm0,%xmm3,%xmm3 |
1386 | vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 |
1387 | vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 |
1388 | vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 |
1389 | vpxor %xmm0,%xmm1,%xmm4 |
1390 | vpxor %xmm4,%xmm3,%xmm3 |
1391 | |
1392 | vpslldq $8,%xmm3,%xmm4 |
1393 | vpsrldq $8,%xmm3,%xmm3 |
1394 | vpxor %xmm4,%xmm0,%xmm0 |
1395 | vpxor %xmm3,%xmm1,%xmm1 |
1396 | vpsllq $57,%xmm0,%xmm3 |
1397 | vpsllq $62,%xmm0,%xmm4 |
1398 | vpxor %xmm3,%xmm4,%xmm4 |
1399 | vpsllq $63,%xmm0,%xmm3 |
1400 | vpxor %xmm3,%xmm4,%xmm4 |
1401 | vpslldq $8,%xmm4,%xmm3 |
1402 | vpsrldq $8,%xmm4,%xmm4 |
1403 | vpxor %xmm3,%xmm0,%xmm0 |
1404 | vpxor %xmm4,%xmm1,%xmm1 |
1405 | |
1406 | vpsrlq $1,%xmm0,%xmm4 |
1407 | vpxor %xmm0,%xmm1,%xmm1 |
1408 | vpxor %xmm4,%xmm0,%xmm0 |
1409 | vpsrlq $5,%xmm4,%xmm4 |
1410 | vpxor %xmm4,%xmm0,%xmm0 |
1411 | vpsrlq $1,%xmm0,%xmm0 |
1412 | vpxor %xmm1,%xmm0,%xmm0 |
1413 | vpshufd $78,%xmm5,%xmm3 |
1414 | vpshufd $78,%xmm0,%xmm4 |
1415 | vpxor %xmm5,%xmm3,%xmm3 |
1416 | vmovdqu %xmm5,0(%rdi) |
1417 | vpxor %xmm0,%xmm4,%xmm4 |
1418 | vmovdqu %xmm0,16(%rdi) |
1419 | leaq 48(%rdi),%rdi |
1420 | subq $1,%r10 |
1421 | jnz .Linit_loop_avx |
1422 | |
1423 | vpalignr $8,%xmm4,%xmm3,%xmm5 |
1424 | vmovdqu %xmm5,-16(%rdi) |
1425 | |
1426 | vzeroupper |
1427 | .byte 0xf3,0xc3 |
1428 | .cfi_endproc |
1429 | .size gcm_init_avx,.-gcm_init_avx |
1430 | .globl gcm_gmult_avx |
1431 | .hidden gcm_gmult_avx |
1432 | .type gcm_gmult_avx,@function |
1433 | .align 32 |
1434 | gcm_gmult_avx: |
1435 | .cfi_startproc |
1436 | jmp .L_gmult_clmul |
1437 | .cfi_endproc |
1438 | .size gcm_gmult_avx,.-gcm_gmult_avx |
1439 | .globl gcm_ghash_avx |
1440 | .hidden gcm_ghash_avx |
1441 | .type gcm_ghash_avx,@function |
1442 | .align 32 |
1443 | gcm_ghash_avx: |
1444 | .cfi_startproc |
1445 | vzeroupper |
1446 | |
1447 | vmovdqu (%rdi),%xmm10 |
1448 | leaq .L0x1c2_polynomial(%rip),%r10 |
1449 | leaq 64(%rsi),%rsi |
1450 | vmovdqu .Lbswap_mask(%rip),%xmm13 |
1451 | vpshufb %xmm13,%xmm10,%xmm10 |
1452 | cmpq $0x80,%rcx |
1453 | jb .Lshort_avx |
1454 | subq $0x80,%rcx |
1455 | |
1456 | vmovdqu 112(%rdx),%xmm14 |
1457 | vmovdqu 0-64(%rsi),%xmm6 |
1458 | vpshufb %xmm13,%xmm14,%xmm14 |
1459 | vmovdqu 32-64(%rsi),%xmm7 |
1460 | |
1461 | vpunpckhqdq %xmm14,%xmm14,%xmm9 |
1462 | vmovdqu 96(%rdx),%xmm15 |
1463 | vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 |
1464 | vpxor %xmm14,%xmm9,%xmm9 |
1465 | vpshufb %xmm13,%xmm15,%xmm15 |
1466 | vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 |
1467 | vmovdqu 16-64(%rsi),%xmm6 |
1468 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1469 | vmovdqu 80(%rdx),%xmm14 |
1470 | vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 |
1471 | vpxor %xmm15,%xmm8,%xmm8 |
1472 | |
1473 | vpshufb %xmm13,%xmm14,%xmm14 |
1474 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 |
1475 | vpunpckhqdq %xmm14,%xmm14,%xmm9 |
1476 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 |
1477 | vmovdqu 48-64(%rsi),%xmm6 |
1478 | vpxor %xmm14,%xmm9,%xmm9 |
1479 | vmovdqu 64(%rdx),%xmm15 |
1480 | vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 |
1481 | vmovdqu 80-64(%rsi),%xmm7 |
1482 | |
1483 | vpshufb %xmm13,%xmm15,%xmm15 |
1484 | vpxor %xmm0,%xmm3,%xmm3 |
1485 | vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 |
1486 | vpxor %xmm1,%xmm4,%xmm4 |
1487 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1488 | vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 |
1489 | vmovdqu 64-64(%rsi),%xmm6 |
1490 | vpxor %xmm2,%xmm5,%xmm5 |
1491 | vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 |
1492 | vpxor %xmm15,%xmm8,%xmm8 |
1493 | |
1494 | vmovdqu 48(%rdx),%xmm14 |
1495 | vpxor %xmm3,%xmm0,%xmm0 |
1496 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 |
1497 | vpxor %xmm4,%xmm1,%xmm1 |
1498 | vpshufb %xmm13,%xmm14,%xmm14 |
1499 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 |
1500 | vmovdqu 96-64(%rsi),%xmm6 |
1501 | vpxor %xmm5,%xmm2,%xmm2 |
1502 | vpunpckhqdq %xmm14,%xmm14,%xmm9 |
1503 | vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 |
1504 | vmovdqu 128-64(%rsi),%xmm7 |
1505 | vpxor %xmm14,%xmm9,%xmm9 |
1506 | |
1507 | vmovdqu 32(%rdx),%xmm15 |
1508 | vpxor %xmm0,%xmm3,%xmm3 |
1509 | vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 |
1510 | vpxor %xmm1,%xmm4,%xmm4 |
1511 | vpshufb %xmm13,%xmm15,%xmm15 |
1512 | vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 |
1513 | vmovdqu 112-64(%rsi),%xmm6 |
1514 | vpxor %xmm2,%xmm5,%xmm5 |
1515 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1516 | vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 |
1517 | vpxor %xmm15,%xmm8,%xmm8 |
1518 | |
1519 | vmovdqu 16(%rdx),%xmm14 |
1520 | vpxor %xmm3,%xmm0,%xmm0 |
1521 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 |
1522 | vpxor %xmm4,%xmm1,%xmm1 |
1523 | vpshufb %xmm13,%xmm14,%xmm14 |
1524 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 |
1525 | vmovdqu 144-64(%rsi),%xmm6 |
1526 | vpxor %xmm5,%xmm2,%xmm2 |
1527 | vpunpckhqdq %xmm14,%xmm14,%xmm9 |
1528 | vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 |
1529 | vmovdqu 176-64(%rsi),%xmm7 |
1530 | vpxor %xmm14,%xmm9,%xmm9 |
1531 | |
1532 | vmovdqu (%rdx),%xmm15 |
1533 | vpxor %xmm0,%xmm3,%xmm3 |
1534 | vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 |
1535 | vpxor %xmm1,%xmm4,%xmm4 |
1536 | vpshufb %xmm13,%xmm15,%xmm15 |
1537 | vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 |
1538 | vmovdqu 160-64(%rsi),%xmm6 |
1539 | vpxor %xmm2,%xmm5,%xmm5 |
1540 | vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 |
1541 | |
1542 | leaq 128(%rdx),%rdx |
1543 | cmpq $0x80,%rcx |
1544 | jb .Ltail_avx |
1545 | |
1546 | vpxor %xmm10,%xmm15,%xmm15 |
1547 | subq $0x80,%rcx |
1548 | jmp .Loop8x_avx |
1549 | |
1550 | .align 32 |
1551 | .Loop8x_avx: |
1552 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1553 | vmovdqu 112(%rdx),%xmm14 |
1554 | vpxor %xmm0,%xmm3,%xmm3 |
1555 | vpxor %xmm15,%xmm8,%xmm8 |
1556 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 |
1557 | vpshufb %xmm13,%xmm14,%xmm14 |
1558 | vpxor %xmm1,%xmm4,%xmm4 |
1559 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 |
1560 | vmovdqu 0-64(%rsi),%xmm6 |
1561 | vpunpckhqdq %xmm14,%xmm14,%xmm9 |
1562 | vpxor %xmm2,%xmm5,%xmm5 |
1563 | vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 |
1564 | vmovdqu 32-64(%rsi),%xmm7 |
1565 | vpxor %xmm14,%xmm9,%xmm9 |
1566 | |
1567 | vmovdqu 96(%rdx),%xmm15 |
1568 | vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 |
1569 | vpxor %xmm3,%xmm10,%xmm10 |
1570 | vpshufb %xmm13,%xmm15,%xmm15 |
1571 | vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 |
1572 | vxorps %xmm4,%xmm11,%xmm11 |
1573 | vmovdqu 16-64(%rsi),%xmm6 |
1574 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1575 | vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 |
1576 | vpxor %xmm5,%xmm12,%xmm12 |
1577 | vxorps %xmm15,%xmm8,%xmm8 |
1578 | |
1579 | vmovdqu 80(%rdx),%xmm14 |
1580 | vpxor %xmm10,%xmm12,%xmm12 |
1581 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 |
1582 | vpxor %xmm11,%xmm12,%xmm12 |
1583 | vpslldq $8,%xmm12,%xmm9 |
1584 | vpxor %xmm0,%xmm3,%xmm3 |
1585 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 |
1586 | vpsrldq $8,%xmm12,%xmm12 |
1587 | vpxor %xmm9,%xmm10,%xmm10 |
1588 | vmovdqu 48-64(%rsi),%xmm6 |
1589 | vpshufb %xmm13,%xmm14,%xmm14 |
1590 | vxorps %xmm12,%xmm11,%xmm11 |
1591 | vpxor %xmm1,%xmm4,%xmm4 |
1592 | vpunpckhqdq %xmm14,%xmm14,%xmm9 |
1593 | vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 |
1594 | vmovdqu 80-64(%rsi),%xmm7 |
1595 | vpxor %xmm14,%xmm9,%xmm9 |
1596 | vpxor %xmm2,%xmm5,%xmm5 |
1597 | |
1598 | vmovdqu 64(%rdx),%xmm15 |
1599 | vpalignr $8,%xmm10,%xmm10,%xmm12 |
1600 | vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 |
1601 | vpshufb %xmm13,%xmm15,%xmm15 |
1602 | vpxor %xmm3,%xmm0,%xmm0 |
1603 | vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 |
1604 | vmovdqu 64-64(%rsi),%xmm6 |
1605 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1606 | vpxor %xmm4,%xmm1,%xmm1 |
1607 | vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 |
1608 | vxorps %xmm15,%xmm8,%xmm8 |
1609 | vpxor %xmm5,%xmm2,%xmm2 |
1610 | |
1611 | vmovdqu 48(%rdx),%xmm14 |
1612 | vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 |
1613 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 |
1614 | vpshufb %xmm13,%xmm14,%xmm14 |
1615 | vpxor %xmm0,%xmm3,%xmm3 |
1616 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 |
1617 | vmovdqu 96-64(%rsi),%xmm6 |
1618 | vpunpckhqdq %xmm14,%xmm14,%xmm9 |
1619 | vpxor %xmm1,%xmm4,%xmm4 |
1620 | vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 |
1621 | vmovdqu 128-64(%rsi),%xmm7 |
1622 | vpxor %xmm14,%xmm9,%xmm9 |
1623 | vpxor %xmm2,%xmm5,%xmm5 |
1624 | |
1625 | vmovdqu 32(%rdx),%xmm15 |
1626 | vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 |
1627 | vpshufb %xmm13,%xmm15,%xmm15 |
1628 | vpxor %xmm3,%xmm0,%xmm0 |
1629 | vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 |
1630 | vmovdqu 112-64(%rsi),%xmm6 |
1631 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1632 | vpxor %xmm4,%xmm1,%xmm1 |
1633 | vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 |
1634 | vpxor %xmm15,%xmm8,%xmm8 |
1635 | vpxor %xmm5,%xmm2,%xmm2 |
1636 | vxorps %xmm12,%xmm10,%xmm10 |
1637 | |
1638 | vmovdqu 16(%rdx),%xmm14 |
1639 | vpalignr $8,%xmm10,%xmm10,%xmm12 |
1640 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 |
1641 | vpshufb %xmm13,%xmm14,%xmm14 |
1642 | vpxor %xmm0,%xmm3,%xmm3 |
1643 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 |
1644 | vmovdqu 144-64(%rsi),%xmm6 |
1645 | vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 |
1646 | vxorps %xmm11,%xmm12,%xmm12 |
1647 | vpunpckhqdq %xmm14,%xmm14,%xmm9 |
1648 | vpxor %xmm1,%xmm4,%xmm4 |
1649 | vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 |
1650 | vmovdqu 176-64(%rsi),%xmm7 |
1651 | vpxor %xmm14,%xmm9,%xmm9 |
1652 | vpxor %xmm2,%xmm5,%xmm5 |
1653 | |
1654 | vmovdqu (%rdx),%xmm15 |
1655 | vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 |
1656 | vpshufb %xmm13,%xmm15,%xmm15 |
1657 | vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 |
1658 | vmovdqu 160-64(%rsi),%xmm6 |
1659 | vpxor %xmm12,%xmm15,%xmm15 |
1660 | vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 |
1661 | vpxor %xmm10,%xmm15,%xmm15 |
1662 | |
1663 | leaq 128(%rdx),%rdx |
1664 | subq $0x80,%rcx |
1665 | jnc .Loop8x_avx |
1666 | |
1667 | addq $0x80,%rcx |
1668 | jmp .Ltail_no_xor_avx |
1669 | |
1670 | .align 32 |
1671 | .Lshort_avx: |
1672 | vmovdqu -16(%rdx,%rcx,1),%xmm14 |
1673 | leaq (%rdx,%rcx,1),%rdx |
1674 | vmovdqu 0-64(%rsi),%xmm6 |
1675 | vmovdqu 32-64(%rsi),%xmm7 |
1676 | vpshufb %xmm13,%xmm14,%xmm15 |
1677 | |
1678 | vmovdqa %xmm0,%xmm3 |
1679 | vmovdqa %xmm1,%xmm4 |
1680 | vmovdqa %xmm2,%xmm5 |
1681 | subq $0x10,%rcx |
1682 | jz .Ltail_avx |
1683 | |
1684 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1685 | vpxor %xmm0,%xmm3,%xmm3 |
1686 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 |
1687 | vpxor %xmm15,%xmm8,%xmm8 |
1688 | vmovdqu -32(%rdx),%xmm14 |
1689 | vpxor %xmm1,%xmm4,%xmm4 |
1690 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 |
1691 | vmovdqu 16-64(%rsi),%xmm6 |
1692 | vpshufb %xmm13,%xmm14,%xmm15 |
1693 | vpxor %xmm2,%xmm5,%xmm5 |
1694 | vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 |
1695 | vpsrldq $8,%xmm7,%xmm7 |
1696 | subq $0x10,%rcx |
1697 | jz .Ltail_avx |
1698 | |
1699 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1700 | vpxor %xmm0,%xmm3,%xmm3 |
1701 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 |
1702 | vpxor %xmm15,%xmm8,%xmm8 |
1703 | vmovdqu -48(%rdx),%xmm14 |
1704 | vpxor %xmm1,%xmm4,%xmm4 |
1705 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 |
1706 | vmovdqu 48-64(%rsi),%xmm6 |
1707 | vpshufb %xmm13,%xmm14,%xmm15 |
1708 | vpxor %xmm2,%xmm5,%xmm5 |
1709 | vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 |
1710 | vmovdqu 80-64(%rsi),%xmm7 |
1711 | subq $0x10,%rcx |
1712 | jz .Ltail_avx |
1713 | |
1714 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1715 | vpxor %xmm0,%xmm3,%xmm3 |
1716 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 |
1717 | vpxor %xmm15,%xmm8,%xmm8 |
1718 | vmovdqu -64(%rdx),%xmm14 |
1719 | vpxor %xmm1,%xmm4,%xmm4 |
1720 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 |
1721 | vmovdqu 64-64(%rsi),%xmm6 |
1722 | vpshufb %xmm13,%xmm14,%xmm15 |
1723 | vpxor %xmm2,%xmm5,%xmm5 |
1724 | vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 |
1725 | vpsrldq $8,%xmm7,%xmm7 |
1726 | subq $0x10,%rcx |
1727 | jz .Ltail_avx |
1728 | |
1729 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1730 | vpxor %xmm0,%xmm3,%xmm3 |
1731 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 |
1732 | vpxor %xmm15,%xmm8,%xmm8 |
1733 | vmovdqu -80(%rdx),%xmm14 |
1734 | vpxor %xmm1,%xmm4,%xmm4 |
1735 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 |
1736 | vmovdqu 96-64(%rsi),%xmm6 |
1737 | vpshufb %xmm13,%xmm14,%xmm15 |
1738 | vpxor %xmm2,%xmm5,%xmm5 |
1739 | vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 |
1740 | vmovdqu 128-64(%rsi),%xmm7 |
1741 | subq $0x10,%rcx |
1742 | jz .Ltail_avx |
1743 | |
1744 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1745 | vpxor %xmm0,%xmm3,%xmm3 |
1746 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 |
1747 | vpxor %xmm15,%xmm8,%xmm8 |
1748 | vmovdqu -96(%rdx),%xmm14 |
1749 | vpxor %xmm1,%xmm4,%xmm4 |
1750 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 |
1751 | vmovdqu 112-64(%rsi),%xmm6 |
1752 | vpshufb %xmm13,%xmm14,%xmm15 |
1753 | vpxor %xmm2,%xmm5,%xmm5 |
1754 | vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 |
1755 | vpsrldq $8,%xmm7,%xmm7 |
1756 | subq $0x10,%rcx |
1757 | jz .Ltail_avx |
1758 | |
1759 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1760 | vpxor %xmm0,%xmm3,%xmm3 |
1761 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 |
1762 | vpxor %xmm15,%xmm8,%xmm8 |
1763 | vmovdqu -112(%rdx),%xmm14 |
1764 | vpxor %xmm1,%xmm4,%xmm4 |
1765 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 |
1766 | vmovdqu 144-64(%rsi),%xmm6 |
1767 | vpshufb %xmm13,%xmm14,%xmm15 |
1768 | vpxor %xmm2,%xmm5,%xmm5 |
1769 | vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 |
1770 | vmovq 184-64(%rsi),%xmm7 |
1771 | subq $0x10,%rcx |
1772 | jmp .Ltail_avx |
1773 | |
1774 | .align 32 |
1775 | .Ltail_avx: |
1776 | vpxor %xmm10,%xmm15,%xmm15 |
1777 | .Ltail_no_xor_avx: |
1778 | vpunpckhqdq %xmm15,%xmm15,%xmm8 |
1779 | vpxor %xmm0,%xmm3,%xmm3 |
1780 | vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 |
1781 | vpxor %xmm15,%xmm8,%xmm8 |
1782 | vpxor %xmm1,%xmm4,%xmm4 |
1783 | vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 |
1784 | vpxor %xmm2,%xmm5,%xmm5 |
1785 | vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 |
1786 | |
1787 | vmovdqu (%r10),%xmm12 |
1788 | |
1789 | vpxor %xmm0,%xmm3,%xmm10 |
1790 | vpxor %xmm1,%xmm4,%xmm11 |
1791 | vpxor %xmm2,%xmm5,%xmm5 |
1792 | |
1793 | vpxor %xmm10,%xmm5,%xmm5 |
1794 | vpxor %xmm11,%xmm5,%xmm5 |
1795 | vpslldq $8,%xmm5,%xmm9 |
1796 | vpsrldq $8,%xmm5,%xmm5 |
1797 | vpxor %xmm9,%xmm10,%xmm10 |
1798 | vpxor %xmm5,%xmm11,%xmm11 |
1799 | |
1800 | vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 |
1801 | vpalignr $8,%xmm10,%xmm10,%xmm10 |
1802 | vpxor %xmm9,%xmm10,%xmm10 |
1803 | |
1804 | vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 |
1805 | vpalignr $8,%xmm10,%xmm10,%xmm10 |
1806 | vpxor %xmm11,%xmm10,%xmm10 |
1807 | vpxor %xmm9,%xmm10,%xmm10 |
1808 | |
1809 | cmpq $0,%rcx |
1810 | jne .Lshort_avx |
1811 | |
1812 | vpshufb %xmm13,%xmm10,%xmm10 |
1813 | vmovdqu %xmm10,(%rdi) |
1814 | vzeroupper |
1815 | .byte 0xf3,0xc3 |
1816 | .cfi_endproc |
1817 | .size gcm_ghash_avx,.-gcm_ghash_avx |
1818 | .align 64 |
1819 | .Lbswap_mask: |
1820 | .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
1821 | .L0x1c2_polynomial: |
1822 | .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 |
1823 | .L7_mask: |
1824 | .long 7,0,7,0 |
1825 | .L7_mask_poly: |
1826 | .long 7,0,450,0 |
1827 | .align 64 |
1828 | .type .Lrem_4bit,@object |
1829 | .Lrem_4bit: |
1830 | .long 0,0,0,471859200,0,943718400,0,610271232 |
1831 | .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
1832 | .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
1833 | .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
1834 | .type .Lrem_8bit,@object |
1835 | .Lrem_8bit: |
1836 | .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E |
1837 | .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E |
1838 | .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E |
1839 | .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E |
1840 | .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E |
1841 | .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E |
1842 | .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E |
1843 | .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E |
1844 | .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE |
1845 | .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE |
1846 | .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE |
1847 | .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE |
1848 | .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E |
1849 | .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E |
1850 | .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE |
1851 | .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE |
1852 | .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E |
1853 | .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E |
1854 | .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E |
1855 | .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E |
1856 | .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E |
1857 | .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E |
1858 | .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E |
1859 | .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E |
1860 | .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE |
1861 | .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE |
1862 | .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE |
1863 | .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE |
1864 | .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E |
1865 | .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E |
1866 | .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE |
1867 | .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE |
1868 | |
1869 | .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
1870 | .align 64 |
1871 | #endif |
1872 | |