1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15.extern OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
17.globl aes_hw_encrypt
18.hidden aes_hw_encrypt
19.type aes_hw_encrypt,@function
20.align 16
21aes_hw_encrypt:
22.cfi_startproc
23#ifndef NDEBUG
24#ifndef BORINGSSL_FIPS
25.extern BORINGSSL_function_hit
26.hidden BORINGSSL_function_hit
27 movb $1,BORINGSSL_function_hit+1(%rip)
28#endif
29#endif
30 movups (%rdi),%xmm2
31 movl 240(%rdx),%eax
32 movups (%rdx),%xmm0
33 movups 16(%rdx),%xmm1
34 leaq 32(%rdx),%rdx
35 xorps %xmm0,%xmm2
36.Loop_enc1_1:
37.byte 102,15,56,220,209
38 decl %eax
39 movups (%rdx),%xmm1
40 leaq 16(%rdx),%rdx
41 jnz .Loop_enc1_1
42.byte 102,15,56,221,209
43 pxor %xmm0,%xmm0
44 pxor %xmm1,%xmm1
45 movups %xmm2,(%rsi)
46 pxor %xmm2,%xmm2
47 .byte 0xf3,0xc3
48.cfi_endproc
49.size aes_hw_encrypt,.-aes_hw_encrypt
50
51.globl aes_hw_decrypt
52.hidden aes_hw_decrypt
53.type aes_hw_decrypt,@function
54.align 16
55aes_hw_decrypt:
56.cfi_startproc
57 movups (%rdi),%xmm2
58 movl 240(%rdx),%eax
59 movups (%rdx),%xmm0
60 movups 16(%rdx),%xmm1
61 leaq 32(%rdx),%rdx
62 xorps %xmm0,%xmm2
63.Loop_dec1_2:
64.byte 102,15,56,222,209
65 decl %eax
66 movups (%rdx),%xmm1
67 leaq 16(%rdx),%rdx
68 jnz .Loop_dec1_2
69.byte 102,15,56,223,209
70 pxor %xmm0,%xmm0
71 pxor %xmm1,%xmm1
72 movups %xmm2,(%rsi)
73 pxor %xmm2,%xmm2
74 .byte 0xf3,0xc3
75.cfi_endproc
76.size aes_hw_decrypt, .-aes_hw_decrypt
77.type _aesni_encrypt2,@function
78.align 16
79_aesni_encrypt2:
80.cfi_startproc
81 movups (%rcx),%xmm0
82 shll $4,%eax
83 movups 16(%rcx),%xmm1
84 xorps %xmm0,%xmm2
85 xorps %xmm0,%xmm3
86 movups 32(%rcx),%xmm0
87 leaq 32(%rcx,%rax,1),%rcx
88 negq %rax
89 addq $16,%rax
90
91.Lenc_loop2:
92.byte 102,15,56,220,209
93.byte 102,15,56,220,217
94 movups (%rcx,%rax,1),%xmm1
95 addq $32,%rax
96.byte 102,15,56,220,208
97.byte 102,15,56,220,216
98 movups -16(%rcx,%rax,1),%xmm0
99 jnz .Lenc_loop2
100
101.byte 102,15,56,220,209
102.byte 102,15,56,220,217
103.byte 102,15,56,221,208
104.byte 102,15,56,221,216
105 .byte 0xf3,0xc3
106.cfi_endproc
107.size _aesni_encrypt2,.-_aesni_encrypt2
108.type _aesni_decrypt2,@function
109.align 16
110_aesni_decrypt2:
111.cfi_startproc
112 movups (%rcx),%xmm0
113 shll $4,%eax
114 movups 16(%rcx),%xmm1
115 xorps %xmm0,%xmm2
116 xorps %xmm0,%xmm3
117 movups 32(%rcx),%xmm0
118 leaq 32(%rcx,%rax,1),%rcx
119 negq %rax
120 addq $16,%rax
121
122.Ldec_loop2:
123.byte 102,15,56,222,209
124.byte 102,15,56,222,217
125 movups (%rcx,%rax,1),%xmm1
126 addq $32,%rax
127.byte 102,15,56,222,208
128.byte 102,15,56,222,216
129 movups -16(%rcx,%rax,1),%xmm0
130 jnz .Ldec_loop2
131
132.byte 102,15,56,222,209
133.byte 102,15,56,222,217
134.byte 102,15,56,223,208
135.byte 102,15,56,223,216
136 .byte 0xf3,0xc3
137.cfi_endproc
138.size _aesni_decrypt2,.-_aesni_decrypt2
139.type _aesni_encrypt3,@function
140.align 16
141_aesni_encrypt3:
142.cfi_startproc
143 movups (%rcx),%xmm0
144 shll $4,%eax
145 movups 16(%rcx),%xmm1
146 xorps %xmm0,%xmm2
147 xorps %xmm0,%xmm3
148 xorps %xmm0,%xmm4
149 movups 32(%rcx),%xmm0
150 leaq 32(%rcx,%rax,1),%rcx
151 negq %rax
152 addq $16,%rax
153
154.Lenc_loop3:
155.byte 102,15,56,220,209
156.byte 102,15,56,220,217
157.byte 102,15,56,220,225
158 movups (%rcx,%rax,1),%xmm1
159 addq $32,%rax
160.byte 102,15,56,220,208
161.byte 102,15,56,220,216
162.byte 102,15,56,220,224
163 movups -16(%rcx,%rax,1),%xmm0
164 jnz .Lenc_loop3
165
166.byte 102,15,56,220,209
167.byte 102,15,56,220,217
168.byte 102,15,56,220,225
169.byte 102,15,56,221,208
170.byte 102,15,56,221,216
171.byte 102,15,56,221,224
172 .byte 0xf3,0xc3
173.cfi_endproc
174.size _aesni_encrypt3,.-_aesni_encrypt3
175.type _aesni_decrypt3,@function
176.align 16
177_aesni_decrypt3:
178.cfi_startproc
179 movups (%rcx),%xmm0
180 shll $4,%eax
181 movups 16(%rcx),%xmm1
182 xorps %xmm0,%xmm2
183 xorps %xmm0,%xmm3
184 xorps %xmm0,%xmm4
185 movups 32(%rcx),%xmm0
186 leaq 32(%rcx,%rax,1),%rcx
187 negq %rax
188 addq $16,%rax
189
190.Ldec_loop3:
191.byte 102,15,56,222,209
192.byte 102,15,56,222,217
193.byte 102,15,56,222,225
194 movups (%rcx,%rax,1),%xmm1
195 addq $32,%rax
196.byte 102,15,56,222,208
197.byte 102,15,56,222,216
198.byte 102,15,56,222,224
199 movups -16(%rcx,%rax,1),%xmm0
200 jnz .Ldec_loop3
201
202.byte 102,15,56,222,209
203.byte 102,15,56,222,217
204.byte 102,15,56,222,225
205.byte 102,15,56,223,208
206.byte 102,15,56,223,216
207.byte 102,15,56,223,224
208 .byte 0xf3,0xc3
209.cfi_endproc
210.size _aesni_decrypt3,.-_aesni_decrypt3
211.type _aesni_encrypt4,@function
212.align 16
213_aesni_encrypt4:
214.cfi_startproc
215 movups (%rcx),%xmm0
216 shll $4,%eax
217 movups 16(%rcx),%xmm1
218 xorps %xmm0,%xmm2
219 xorps %xmm0,%xmm3
220 xorps %xmm0,%xmm4
221 xorps %xmm0,%xmm5
222 movups 32(%rcx),%xmm0
223 leaq 32(%rcx,%rax,1),%rcx
224 negq %rax
225.byte 0x0f,0x1f,0x00
226 addq $16,%rax
227
228.Lenc_loop4:
229.byte 102,15,56,220,209
230.byte 102,15,56,220,217
231.byte 102,15,56,220,225
232.byte 102,15,56,220,233
233 movups (%rcx,%rax,1),%xmm1
234 addq $32,%rax
235.byte 102,15,56,220,208
236.byte 102,15,56,220,216
237.byte 102,15,56,220,224
238.byte 102,15,56,220,232
239 movups -16(%rcx,%rax,1),%xmm0
240 jnz .Lenc_loop4
241
242.byte 102,15,56,220,209
243.byte 102,15,56,220,217
244.byte 102,15,56,220,225
245.byte 102,15,56,220,233
246.byte 102,15,56,221,208
247.byte 102,15,56,221,216
248.byte 102,15,56,221,224
249.byte 102,15,56,221,232
250 .byte 0xf3,0xc3
251.cfi_endproc
252.size _aesni_encrypt4,.-_aesni_encrypt4
253.type _aesni_decrypt4,@function
254.align 16
255_aesni_decrypt4:
256.cfi_startproc
257 movups (%rcx),%xmm0
258 shll $4,%eax
259 movups 16(%rcx),%xmm1
260 xorps %xmm0,%xmm2
261 xorps %xmm0,%xmm3
262 xorps %xmm0,%xmm4
263 xorps %xmm0,%xmm5
264 movups 32(%rcx),%xmm0
265 leaq 32(%rcx,%rax,1),%rcx
266 negq %rax
267.byte 0x0f,0x1f,0x00
268 addq $16,%rax
269
270.Ldec_loop4:
271.byte 102,15,56,222,209
272.byte 102,15,56,222,217
273.byte 102,15,56,222,225
274.byte 102,15,56,222,233
275 movups (%rcx,%rax,1),%xmm1
276 addq $32,%rax
277.byte 102,15,56,222,208
278.byte 102,15,56,222,216
279.byte 102,15,56,222,224
280.byte 102,15,56,222,232
281 movups -16(%rcx,%rax,1),%xmm0
282 jnz .Ldec_loop4
283
284.byte 102,15,56,222,209
285.byte 102,15,56,222,217
286.byte 102,15,56,222,225
287.byte 102,15,56,222,233
288.byte 102,15,56,223,208
289.byte 102,15,56,223,216
290.byte 102,15,56,223,224
291.byte 102,15,56,223,232
292 .byte 0xf3,0xc3
293.cfi_endproc
294.size _aesni_decrypt4,.-_aesni_decrypt4
295.type _aesni_encrypt6,@function
296.align 16
297_aesni_encrypt6:
298.cfi_startproc
299 movups (%rcx),%xmm0
300 shll $4,%eax
301 movups 16(%rcx),%xmm1
302 xorps %xmm0,%xmm2
303 pxor %xmm0,%xmm3
304 pxor %xmm0,%xmm4
305.byte 102,15,56,220,209
306 leaq 32(%rcx,%rax,1),%rcx
307 negq %rax
308.byte 102,15,56,220,217
309 pxor %xmm0,%xmm5
310 pxor %xmm0,%xmm6
311.byte 102,15,56,220,225
312 pxor %xmm0,%xmm7
313 movups (%rcx,%rax,1),%xmm0
314 addq $16,%rax
315 jmp .Lenc_loop6_enter
316.align 16
317.Lenc_loop6:
318.byte 102,15,56,220,209
319.byte 102,15,56,220,217
320.byte 102,15,56,220,225
321.Lenc_loop6_enter:
322.byte 102,15,56,220,233
323.byte 102,15,56,220,241
324.byte 102,15,56,220,249
325 movups (%rcx,%rax,1),%xmm1
326 addq $32,%rax
327.byte 102,15,56,220,208
328.byte 102,15,56,220,216
329.byte 102,15,56,220,224
330.byte 102,15,56,220,232
331.byte 102,15,56,220,240
332.byte 102,15,56,220,248
333 movups -16(%rcx,%rax,1),%xmm0
334 jnz .Lenc_loop6
335
336.byte 102,15,56,220,209
337.byte 102,15,56,220,217
338.byte 102,15,56,220,225
339.byte 102,15,56,220,233
340.byte 102,15,56,220,241
341.byte 102,15,56,220,249
342.byte 102,15,56,221,208
343.byte 102,15,56,221,216
344.byte 102,15,56,221,224
345.byte 102,15,56,221,232
346.byte 102,15,56,221,240
347.byte 102,15,56,221,248
348 .byte 0xf3,0xc3
349.cfi_endproc
350.size _aesni_encrypt6,.-_aesni_encrypt6
351.type _aesni_decrypt6,@function
352.align 16
353_aesni_decrypt6:
354.cfi_startproc
355 movups (%rcx),%xmm0
356 shll $4,%eax
357 movups 16(%rcx),%xmm1
358 xorps %xmm0,%xmm2
359 pxor %xmm0,%xmm3
360 pxor %xmm0,%xmm4
361.byte 102,15,56,222,209
362 leaq 32(%rcx,%rax,1),%rcx
363 negq %rax
364.byte 102,15,56,222,217
365 pxor %xmm0,%xmm5
366 pxor %xmm0,%xmm6
367.byte 102,15,56,222,225
368 pxor %xmm0,%xmm7
369 movups (%rcx,%rax,1),%xmm0
370 addq $16,%rax
371 jmp .Ldec_loop6_enter
372.align 16
373.Ldec_loop6:
374.byte 102,15,56,222,209
375.byte 102,15,56,222,217
376.byte 102,15,56,222,225
377.Ldec_loop6_enter:
378.byte 102,15,56,222,233
379.byte 102,15,56,222,241
380.byte 102,15,56,222,249
381 movups (%rcx,%rax,1),%xmm1
382 addq $32,%rax
383.byte 102,15,56,222,208
384.byte 102,15,56,222,216
385.byte 102,15,56,222,224
386.byte 102,15,56,222,232
387.byte 102,15,56,222,240
388.byte 102,15,56,222,248
389 movups -16(%rcx,%rax,1),%xmm0
390 jnz .Ldec_loop6
391
392.byte 102,15,56,222,209
393.byte 102,15,56,222,217
394.byte 102,15,56,222,225
395.byte 102,15,56,222,233
396.byte 102,15,56,222,241
397.byte 102,15,56,222,249
398.byte 102,15,56,223,208
399.byte 102,15,56,223,216
400.byte 102,15,56,223,224
401.byte 102,15,56,223,232
402.byte 102,15,56,223,240
403.byte 102,15,56,223,248
404 .byte 0xf3,0xc3
405.cfi_endproc
406.size _aesni_decrypt6,.-_aesni_decrypt6
407.type _aesni_encrypt8,@function
408.align 16
409_aesni_encrypt8:
410.cfi_startproc
411 movups (%rcx),%xmm0
412 shll $4,%eax
413 movups 16(%rcx),%xmm1
414 xorps %xmm0,%xmm2
415 xorps %xmm0,%xmm3
416 pxor %xmm0,%xmm4
417 pxor %xmm0,%xmm5
418 pxor %xmm0,%xmm6
419 leaq 32(%rcx,%rax,1),%rcx
420 negq %rax
421.byte 102,15,56,220,209
422 pxor %xmm0,%xmm7
423 pxor %xmm0,%xmm8
424.byte 102,15,56,220,217
425 pxor %xmm0,%xmm9
426 movups (%rcx,%rax,1),%xmm0
427 addq $16,%rax
428 jmp .Lenc_loop8_inner
429.align 16
430.Lenc_loop8:
431.byte 102,15,56,220,209
432.byte 102,15,56,220,217
433.Lenc_loop8_inner:
434.byte 102,15,56,220,225
435.byte 102,15,56,220,233
436.byte 102,15,56,220,241
437.byte 102,15,56,220,249
438.byte 102,68,15,56,220,193
439.byte 102,68,15,56,220,201
440.Lenc_loop8_enter:
441 movups (%rcx,%rax,1),%xmm1
442 addq $32,%rax
443.byte 102,15,56,220,208
444.byte 102,15,56,220,216
445.byte 102,15,56,220,224
446.byte 102,15,56,220,232
447.byte 102,15,56,220,240
448.byte 102,15,56,220,248
449.byte 102,68,15,56,220,192
450.byte 102,68,15,56,220,200
451 movups -16(%rcx,%rax,1),%xmm0
452 jnz .Lenc_loop8
453
454.byte 102,15,56,220,209
455.byte 102,15,56,220,217
456.byte 102,15,56,220,225
457.byte 102,15,56,220,233
458.byte 102,15,56,220,241
459.byte 102,15,56,220,249
460.byte 102,68,15,56,220,193
461.byte 102,68,15,56,220,201
462.byte 102,15,56,221,208
463.byte 102,15,56,221,216
464.byte 102,15,56,221,224
465.byte 102,15,56,221,232
466.byte 102,15,56,221,240
467.byte 102,15,56,221,248
468.byte 102,68,15,56,221,192
469.byte 102,68,15,56,221,200
470 .byte 0xf3,0xc3
471.cfi_endproc
472.size _aesni_encrypt8,.-_aesni_encrypt8
473.type _aesni_decrypt8,@function
474.align 16
475_aesni_decrypt8:
476.cfi_startproc
477 movups (%rcx),%xmm0
478 shll $4,%eax
479 movups 16(%rcx),%xmm1
480 xorps %xmm0,%xmm2
481 xorps %xmm0,%xmm3
482 pxor %xmm0,%xmm4
483 pxor %xmm0,%xmm5
484 pxor %xmm0,%xmm6
485 leaq 32(%rcx,%rax,1),%rcx
486 negq %rax
487.byte 102,15,56,222,209
488 pxor %xmm0,%xmm7
489 pxor %xmm0,%xmm8
490.byte 102,15,56,222,217
491 pxor %xmm0,%xmm9
492 movups (%rcx,%rax,1),%xmm0
493 addq $16,%rax
494 jmp .Ldec_loop8_inner
495.align 16
496.Ldec_loop8:
497.byte 102,15,56,222,209
498.byte 102,15,56,222,217
499.Ldec_loop8_inner:
500.byte 102,15,56,222,225
501.byte 102,15,56,222,233
502.byte 102,15,56,222,241
503.byte 102,15,56,222,249
504.byte 102,68,15,56,222,193
505.byte 102,68,15,56,222,201
506.Ldec_loop8_enter:
507 movups (%rcx,%rax,1),%xmm1
508 addq $32,%rax
509.byte 102,15,56,222,208
510.byte 102,15,56,222,216
511.byte 102,15,56,222,224
512.byte 102,15,56,222,232
513.byte 102,15,56,222,240
514.byte 102,15,56,222,248
515.byte 102,68,15,56,222,192
516.byte 102,68,15,56,222,200
517 movups -16(%rcx,%rax,1),%xmm0
518 jnz .Ldec_loop8
519
520.byte 102,15,56,222,209
521.byte 102,15,56,222,217
522.byte 102,15,56,222,225
523.byte 102,15,56,222,233
524.byte 102,15,56,222,241
525.byte 102,15,56,222,249
526.byte 102,68,15,56,222,193
527.byte 102,68,15,56,222,201
528.byte 102,15,56,223,208
529.byte 102,15,56,223,216
530.byte 102,15,56,223,224
531.byte 102,15,56,223,232
532.byte 102,15,56,223,240
533.byte 102,15,56,223,248
534.byte 102,68,15,56,223,192
535.byte 102,68,15,56,223,200
536 .byte 0xf3,0xc3
537.cfi_endproc
538.size _aesni_decrypt8,.-_aesni_decrypt8
539.globl aes_hw_ecb_encrypt
540.hidden aes_hw_ecb_encrypt
541.type aes_hw_ecb_encrypt,@function
542.align 16
543aes_hw_ecb_encrypt:
544.cfi_startproc
545 andq $-16,%rdx
546 jz .Lecb_ret
547
548 movl 240(%rcx),%eax
549 movups (%rcx),%xmm0
550 movq %rcx,%r11
551 movl %eax,%r10d
552 testl %r8d,%r8d
553 jz .Lecb_decrypt
554
555 cmpq $0x80,%rdx
556 jb .Lecb_enc_tail
557
558 movdqu (%rdi),%xmm2
559 movdqu 16(%rdi),%xmm3
560 movdqu 32(%rdi),%xmm4
561 movdqu 48(%rdi),%xmm5
562 movdqu 64(%rdi),%xmm6
563 movdqu 80(%rdi),%xmm7
564 movdqu 96(%rdi),%xmm8
565 movdqu 112(%rdi),%xmm9
566 leaq 128(%rdi),%rdi
567 subq $0x80,%rdx
568 jmp .Lecb_enc_loop8_enter
569.align 16
570.Lecb_enc_loop8:
571 movups %xmm2,(%rsi)
572 movq %r11,%rcx
573 movdqu (%rdi),%xmm2
574 movl %r10d,%eax
575 movups %xmm3,16(%rsi)
576 movdqu 16(%rdi),%xmm3
577 movups %xmm4,32(%rsi)
578 movdqu 32(%rdi),%xmm4
579 movups %xmm5,48(%rsi)
580 movdqu 48(%rdi),%xmm5
581 movups %xmm6,64(%rsi)
582 movdqu 64(%rdi),%xmm6
583 movups %xmm7,80(%rsi)
584 movdqu 80(%rdi),%xmm7
585 movups %xmm8,96(%rsi)
586 movdqu 96(%rdi),%xmm8
587 movups %xmm9,112(%rsi)
588 leaq 128(%rsi),%rsi
589 movdqu 112(%rdi),%xmm9
590 leaq 128(%rdi),%rdi
591.Lecb_enc_loop8_enter:
592
593 call _aesni_encrypt8
594
595 subq $0x80,%rdx
596 jnc .Lecb_enc_loop8
597
598 movups %xmm2,(%rsi)
599 movq %r11,%rcx
600 movups %xmm3,16(%rsi)
601 movl %r10d,%eax
602 movups %xmm4,32(%rsi)
603 movups %xmm5,48(%rsi)
604 movups %xmm6,64(%rsi)
605 movups %xmm7,80(%rsi)
606 movups %xmm8,96(%rsi)
607 movups %xmm9,112(%rsi)
608 leaq 128(%rsi),%rsi
609 addq $0x80,%rdx
610 jz .Lecb_ret
611
612.Lecb_enc_tail:
613 movups (%rdi),%xmm2
614 cmpq $0x20,%rdx
615 jb .Lecb_enc_one
616 movups 16(%rdi),%xmm3
617 je .Lecb_enc_two
618 movups 32(%rdi),%xmm4
619 cmpq $0x40,%rdx
620 jb .Lecb_enc_three
621 movups 48(%rdi),%xmm5
622 je .Lecb_enc_four
623 movups 64(%rdi),%xmm6
624 cmpq $0x60,%rdx
625 jb .Lecb_enc_five
626 movups 80(%rdi),%xmm7
627 je .Lecb_enc_six
628 movdqu 96(%rdi),%xmm8
629 xorps %xmm9,%xmm9
630 call _aesni_encrypt8
631 movups %xmm2,(%rsi)
632 movups %xmm3,16(%rsi)
633 movups %xmm4,32(%rsi)
634 movups %xmm5,48(%rsi)
635 movups %xmm6,64(%rsi)
636 movups %xmm7,80(%rsi)
637 movups %xmm8,96(%rsi)
638 jmp .Lecb_ret
639.align 16
640.Lecb_enc_one:
641 movups (%rcx),%xmm0
642 movups 16(%rcx),%xmm1
643 leaq 32(%rcx),%rcx
644 xorps %xmm0,%xmm2
645.Loop_enc1_3:
646.byte 102,15,56,220,209
647 decl %eax
648 movups (%rcx),%xmm1
649 leaq 16(%rcx),%rcx
650 jnz .Loop_enc1_3
651.byte 102,15,56,221,209
652 movups %xmm2,(%rsi)
653 jmp .Lecb_ret
654.align 16
655.Lecb_enc_two:
656 call _aesni_encrypt2
657 movups %xmm2,(%rsi)
658 movups %xmm3,16(%rsi)
659 jmp .Lecb_ret
660.align 16
661.Lecb_enc_three:
662 call _aesni_encrypt3
663 movups %xmm2,(%rsi)
664 movups %xmm3,16(%rsi)
665 movups %xmm4,32(%rsi)
666 jmp .Lecb_ret
667.align 16
668.Lecb_enc_four:
669 call _aesni_encrypt4
670 movups %xmm2,(%rsi)
671 movups %xmm3,16(%rsi)
672 movups %xmm4,32(%rsi)
673 movups %xmm5,48(%rsi)
674 jmp .Lecb_ret
675.align 16
676.Lecb_enc_five:
677 xorps %xmm7,%xmm7
678 call _aesni_encrypt6
679 movups %xmm2,(%rsi)
680 movups %xmm3,16(%rsi)
681 movups %xmm4,32(%rsi)
682 movups %xmm5,48(%rsi)
683 movups %xmm6,64(%rsi)
684 jmp .Lecb_ret
685.align 16
686.Lecb_enc_six:
687 call _aesni_encrypt6
688 movups %xmm2,(%rsi)
689 movups %xmm3,16(%rsi)
690 movups %xmm4,32(%rsi)
691 movups %xmm5,48(%rsi)
692 movups %xmm6,64(%rsi)
693 movups %xmm7,80(%rsi)
694 jmp .Lecb_ret
695
696.align 16
697.Lecb_decrypt:
698 cmpq $0x80,%rdx
699 jb .Lecb_dec_tail
700
701 movdqu (%rdi),%xmm2
702 movdqu 16(%rdi),%xmm3
703 movdqu 32(%rdi),%xmm4
704 movdqu 48(%rdi),%xmm5
705 movdqu 64(%rdi),%xmm6
706 movdqu 80(%rdi),%xmm7
707 movdqu 96(%rdi),%xmm8
708 movdqu 112(%rdi),%xmm9
709 leaq 128(%rdi),%rdi
710 subq $0x80,%rdx
711 jmp .Lecb_dec_loop8_enter
712.align 16
713.Lecb_dec_loop8:
714 movups %xmm2,(%rsi)
715 movq %r11,%rcx
716 movdqu (%rdi),%xmm2
717 movl %r10d,%eax
718 movups %xmm3,16(%rsi)
719 movdqu 16(%rdi),%xmm3
720 movups %xmm4,32(%rsi)
721 movdqu 32(%rdi),%xmm4
722 movups %xmm5,48(%rsi)
723 movdqu 48(%rdi),%xmm5
724 movups %xmm6,64(%rsi)
725 movdqu 64(%rdi),%xmm6
726 movups %xmm7,80(%rsi)
727 movdqu 80(%rdi),%xmm7
728 movups %xmm8,96(%rsi)
729 movdqu 96(%rdi),%xmm8
730 movups %xmm9,112(%rsi)
731 leaq 128(%rsi),%rsi
732 movdqu 112(%rdi),%xmm9
733 leaq 128(%rdi),%rdi
734.Lecb_dec_loop8_enter:
735
736 call _aesni_decrypt8
737
738 movups (%r11),%xmm0
739 subq $0x80,%rdx
740 jnc .Lecb_dec_loop8
741
742 movups %xmm2,(%rsi)
743 pxor %xmm2,%xmm2
744 movq %r11,%rcx
745 movups %xmm3,16(%rsi)
746 pxor %xmm3,%xmm3
747 movl %r10d,%eax
748 movups %xmm4,32(%rsi)
749 pxor %xmm4,%xmm4
750 movups %xmm5,48(%rsi)
751 pxor %xmm5,%xmm5
752 movups %xmm6,64(%rsi)
753 pxor %xmm6,%xmm6
754 movups %xmm7,80(%rsi)
755 pxor %xmm7,%xmm7
756 movups %xmm8,96(%rsi)
757 pxor %xmm8,%xmm8
758 movups %xmm9,112(%rsi)
759 pxor %xmm9,%xmm9
760 leaq 128(%rsi),%rsi
761 addq $0x80,%rdx
762 jz .Lecb_ret
763
764.Lecb_dec_tail:
765 movups (%rdi),%xmm2
766 cmpq $0x20,%rdx
767 jb .Lecb_dec_one
768 movups 16(%rdi),%xmm3
769 je .Lecb_dec_two
770 movups 32(%rdi),%xmm4
771 cmpq $0x40,%rdx
772 jb .Lecb_dec_three
773 movups 48(%rdi),%xmm5
774 je .Lecb_dec_four
775 movups 64(%rdi),%xmm6
776 cmpq $0x60,%rdx
777 jb .Lecb_dec_five
778 movups 80(%rdi),%xmm7
779 je .Lecb_dec_six
780 movups 96(%rdi),%xmm8
781 movups (%rcx),%xmm0
782 xorps %xmm9,%xmm9
783 call _aesni_decrypt8
784 movups %xmm2,(%rsi)
785 pxor %xmm2,%xmm2
786 movups %xmm3,16(%rsi)
787 pxor %xmm3,%xmm3
788 movups %xmm4,32(%rsi)
789 pxor %xmm4,%xmm4
790 movups %xmm5,48(%rsi)
791 pxor %xmm5,%xmm5
792 movups %xmm6,64(%rsi)
793 pxor %xmm6,%xmm6
794 movups %xmm7,80(%rsi)
795 pxor %xmm7,%xmm7
796 movups %xmm8,96(%rsi)
797 pxor %xmm8,%xmm8
798 pxor %xmm9,%xmm9
799 jmp .Lecb_ret
800.align 16
801.Lecb_dec_one:
802 movups (%rcx),%xmm0
803 movups 16(%rcx),%xmm1
804 leaq 32(%rcx),%rcx
805 xorps %xmm0,%xmm2
806.Loop_dec1_4:
807.byte 102,15,56,222,209
808 decl %eax
809 movups (%rcx),%xmm1
810 leaq 16(%rcx),%rcx
811 jnz .Loop_dec1_4
812.byte 102,15,56,223,209
813 movups %xmm2,(%rsi)
814 pxor %xmm2,%xmm2
815 jmp .Lecb_ret
816.align 16
817.Lecb_dec_two:
818 call _aesni_decrypt2
819 movups %xmm2,(%rsi)
820 pxor %xmm2,%xmm2
821 movups %xmm3,16(%rsi)
822 pxor %xmm3,%xmm3
823 jmp .Lecb_ret
824.align 16
825.Lecb_dec_three:
826 call _aesni_decrypt3
827 movups %xmm2,(%rsi)
828 pxor %xmm2,%xmm2
829 movups %xmm3,16(%rsi)
830 pxor %xmm3,%xmm3
831 movups %xmm4,32(%rsi)
832 pxor %xmm4,%xmm4
833 jmp .Lecb_ret
834.align 16
835.Lecb_dec_four:
836 call _aesni_decrypt4
837 movups %xmm2,(%rsi)
838 pxor %xmm2,%xmm2
839 movups %xmm3,16(%rsi)
840 pxor %xmm3,%xmm3
841 movups %xmm4,32(%rsi)
842 pxor %xmm4,%xmm4
843 movups %xmm5,48(%rsi)
844 pxor %xmm5,%xmm5
845 jmp .Lecb_ret
846.align 16
847.Lecb_dec_five:
848 xorps %xmm7,%xmm7
849 call _aesni_decrypt6
850 movups %xmm2,(%rsi)
851 pxor %xmm2,%xmm2
852 movups %xmm3,16(%rsi)
853 pxor %xmm3,%xmm3
854 movups %xmm4,32(%rsi)
855 pxor %xmm4,%xmm4
856 movups %xmm5,48(%rsi)
857 pxor %xmm5,%xmm5
858 movups %xmm6,64(%rsi)
859 pxor %xmm6,%xmm6
860 pxor %xmm7,%xmm7
861 jmp .Lecb_ret
862.align 16
863.Lecb_dec_six:
864 call _aesni_decrypt6
865 movups %xmm2,(%rsi)
866 pxor %xmm2,%xmm2
867 movups %xmm3,16(%rsi)
868 pxor %xmm3,%xmm3
869 movups %xmm4,32(%rsi)
870 pxor %xmm4,%xmm4
871 movups %xmm5,48(%rsi)
872 pxor %xmm5,%xmm5
873 movups %xmm6,64(%rsi)
874 pxor %xmm6,%xmm6
875 movups %xmm7,80(%rsi)
876 pxor %xmm7,%xmm7
877
878.Lecb_ret:
879 xorps %xmm0,%xmm0
880 pxor %xmm1,%xmm1
881 .byte 0xf3,0xc3
882.cfi_endproc
883.size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt
884.globl aes_hw_ctr32_encrypt_blocks
885.hidden aes_hw_ctr32_encrypt_blocks
886.type aes_hw_ctr32_encrypt_blocks,@function
887.align 16
888aes_hw_ctr32_encrypt_blocks:
889.cfi_startproc
890#ifndef NDEBUG
891#ifndef BORINGSSL_FIPS
892 movb $1,BORINGSSL_function_hit(%rip)
893#endif
894#endif
895 cmpq $1,%rdx
896 jne .Lctr32_bulk
897
898
899
900 movups (%r8),%xmm2
901 movups (%rdi),%xmm3
902 movl 240(%rcx),%edx
903 movups (%rcx),%xmm0
904 movups 16(%rcx),%xmm1
905 leaq 32(%rcx),%rcx
906 xorps %xmm0,%xmm2
907.Loop_enc1_5:
908.byte 102,15,56,220,209
909 decl %edx
910 movups (%rcx),%xmm1
911 leaq 16(%rcx),%rcx
912 jnz .Loop_enc1_5
913.byte 102,15,56,221,209
914 pxor %xmm0,%xmm0
915 pxor %xmm1,%xmm1
916 xorps %xmm3,%xmm2
917 pxor %xmm3,%xmm3
918 movups %xmm2,(%rsi)
919 xorps %xmm2,%xmm2
920 jmp .Lctr32_epilogue
921
922.align 16
923.Lctr32_bulk:
924 leaq (%rsp),%r11
925.cfi_def_cfa_register %r11
926 pushq %rbp
927.cfi_offset %rbp,-16
928 subq $128,%rsp
929 andq $-16,%rsp
930
931
932
933
934 movdqu (%r8),%xmm2
935 movdqu (%rcx),%xmm0
936 movl 12(%r8),%r8d
937 pxor %xmm0,%xmm2
938 movl 12(%rcx),%ebp
939 movdqa %xmm2,0(%rsp)
940 bswapl %r8d
941 movdqa %xmm2,%xmm3
942 movdqa %xmm2,%xmm4
943 movdqa %xmm2,%xmm5
944 movdqa %xmm2,64(%rsp)
945 movdqa %xmm2,80(%rsp)
946 movdqa %xmm2,96(%rsp)
947 movq %rdx,%r10
948 movdqa %xmm2,112(%rsp)
949
950 leaq 1(%r8),%rax
951 leaq 2(%r8),%rdx
952 bswapl %eax
953 bswapl %edx
954 xorl %ebp,%eax
955 xorl %ebp,%edx
956.byte 102,15,58,34,216,3
957 leaq 3(%r8),%rax
958 movdqa %xmm3,16(%rsp)
959.byte 102,15,58,34,226,3
960 bswapl %eax
961 movq %r10,%rdx
962 leaq 4(%r8),%r10
963 movdqa %xmm4,32(%rsp)
964 xorl %ebp,%eax
965 bswapl %r10d
966.byte 102,15,58,34,232,3
967 xorl %ebp,%r10d
968 movdqa %xmm5,48(%rsp)
969 leaq 5(%r8),%r9
970 movl %r10d,64+12(%rsp)
971 bswapl %r9d
972 leaq 6(%r8),%r10
973 movl 240(%rcx),%eax
974 xorl %ebp,%r9d
975 bswapl %r10d
976 movl %r9d,80+12(%rsp)
977 xorl %ebp,%r10d
978 leaq 7(%r8),%r9
979 movl %r10d,96+12(%rsp)
980 bswapl %r9d
981 leaq OPENSSL_ia32cap_P(%rip),%r10
982 movl 4(%r10),%r10d
983 xorl %ebp,%r9d
984 andl $71303168,%r10d
985 movl %r9d,112+12(%rsp)
986
987 movups 16(%rcx),%xmm1
988
989 movdqa 64(%rsp),%xmm6
990 movdqa 80(%rsp),%xmm7
991
992 cmpq $8,%rdx
993 jb .Lctr32_tail
994
995 subq $6,%rdx
996 cmpl $4194304,%r10d
997 je .Lctr32_6x
998
999 leaq 128(%rcx),%rcx
1000 subq $2,%rdx
1001 jmp .Lctr32_loop8
1002
1003.align 16
1004.Lctr32_6x:
1005 shll $4,%eax
1006 movl $48,%r10d
1007 bswapl %ebp
1008 leaq 32(%rcx,%rax,1),%rcx
1009 subq %rax,%r10
1010 jmp .Lctr32_loop6
1011
1012.align 16
1013.Lctr32_loop6:
1014 addl $6,%r8d
1015 movups -48(%rcx,%r10,1),%xmm0
1016.byte 102,15,56,220,209
1017 movl %r8d,%eax
1018 xorl %ebp,%eax
1019.byte 102,15,56,220,217
1020.byte 0x0f,0x38,0xf1,0x44,0x24,12
1021 leal 1(%r8),%eax
1022.byte 102,15,56,220,225
1023 xorl %ebp,%eax
1024.byte 0x0f,0x38,0xf1,0x44,0x24,28
1025.byte 102,15,56,220,233
1026 leal 2(%r8),%eax
1027 xorl %ebp,%eax
1028.byte 102,15,56,220,241
1029.byte 0x0f,0x38,0xf1,0x44,0x24,44
1030 leal 3(%r8),%eax
1031.byte 102,15,56,220,249
1032 movups -32(%rcx,%r10,1),%xmm1
1033 xorl %ebp,%eax
1034
1035.byte 102,15,56,220,208
1036.byte 0x0f,0x38,0xf1,0x44,0x24,60
1037 leal 4(%r8),%eax
1038.byte 102,15,56,220,216
1039 xorl %ebp,%eax
1040.byte 0x0f,0x38,0xf1,0x44,0x24,76
1041.byte 102,15,56,220,224
1042 leal 5(%r8),%eax
1043 xorl %ebp,%eax
1044.byte 102,15,56,220,232
1045.byte 0x0f,0x38,0xf1,0x44,0x24,92
1046 movq %r10,%rax
1047.byte 102,15,56,220,240
1048.byte 102,15,56,220,248
1049 movups -16(%rcx,%r10,1),%xmm0
1050
1051 call .Lenc_loop6
1052
1053 movdqu (%rdi),%xmm8
1054 movdqu 16(%rdi),%xmm9
1055 movdqu 32(%rdi),%xmm10
1056 movdqu 48(%rdi),%xmm11
1057 movdqu 64(%rdi),%xmm12
1058 movdqu 80(%rdi),%xmm13
1059 leaq 96(%rdi),%rdi
1060 movups -64(%rcx,%r10,1),%xmm1
1061 pxor %xmm2,%xmm8
1062 movaps 0(%rsp),%xmm2
1063 pxor %xmm3,%xmm9
1064 movaps 16(%rsp),%xmm3
1065 pxor %xmm4,%xmm10
1066 movaps 32(%rsp),%xmm4
1067 pxor %xmm5,%xmm11
1068 movaps 48(%rsp),%xmm5
1069 pxor %xmm6,%xmm12
1070 movaps 64(%rsp),%xmm6
1071 pxor %xmm7,%xmm13
1072 movaps 80(%rsp),%xmm7
1073 movdqu %xmm8,(%rsi)
1074 movdqu %xmm9,16(%rsi)
1075 movdqu %xmm10,32(%rsi)
1076 movdqu %xmm11,48(%rsi)
1077 movdqu %xmm12,64(%rsi)
1078 movdqu %xmm13,80(%rsi)
1079 leaq 96(%rsi),%rsi
1080
1081 subq $6,%rdx
1082 jnc .Lctr32_loop6
1083
1084 addq $6,%rdx
1085 jz .Lctr32_done
1086
1087 leal -48(%r10),%eax
1088 leaq -80(%rcx,%r10,1),%rcx
1089 negl %eax
1090 shrl $4,%eax
1091 jmp .Lctr32_tail
1092
1093.align 32
1094.Lctr32_loop8:
1095 addl $8,%r8d
1096 movdqa 96(%rsp),%xmm8
1097.byte 102,15,56,220,209
1098 movl %r8d,%r9d
1099 movdqa 112(%rsp),%xmm9
1100.byte 102,15,56,220,217
1101 bswapl %r9d
1102 movups 32-128(%rcx),%xmm0
1103.byte 102,15,56,220,225
1104 xorl %ebp,%r9d
1105 nop
1106.byte 102,15,56,220,233
1107 movl %r9d,0+12(%rsp)
1108 leaq 1(%r8),%r9
1109.byte 102,15,56,220,241
1110.byte 102,15,56,220,249
1111.byte 102,68,15,56,220,193
1112.byte 102,68,15,56,220,201
1113 movups 48-128(%rcx),%xmm1
1114 bswapl %r9d
1115.byte 102,15,56,220,208
1116.byte 102,15,56,220,216
1117 xorl %ebp,%r9d
1118.byte 0x66,0x90
1119.byte 102,15,56,220,224
1120.byte 102,15,56,220,232
1121 movl %r9d,16+12(%rsp)
1122 leaq 2(%r8),%r9
1123.byte 102,15,56,220,240
1124.byte 102,15,56,220,248
1125.byte 102,68,15,56,220,192
1126.byte 102,68,15,56,220,200
1127 movups 64-128(%rcx),%xmm0
1128 bswapl %r9d
1129.byte 102,15,56,220,209
1130.byte 102,15,56,220,217
1131 xorl %ebp,%r9d
1132.byte 0x66,0x90
1133.byte 102,15,56,220,225
1134.byte 102,15,56,220,233
1135 movl %r9d,32+12(%rsp)
1136 leaq 3(%r8),%r9
1137.byte 102,15,56,220,241
1138.byte 102,15,56,220,249
1139.byte 102,68,15,56,220,193
1140.byte 102,68,15,56,220,201
1141 movups 80-128(%rcx),%xmm1
1142 bswapl %r9d
1143.byte 102,15,56,220,208
1144.byte 102,15,56,220,216
1145 xorl %ebp,%r9d
1146.byte 0x66,0x90
1147.byte 102,15,56,220,224
1148.byte 102,15,56,220,232
1149 movl %r9d,48+12(%rsp)
1150 leaq 4(%r8),%r9
1151.byte 102,15,56,220,240
1152.byte 102,15,56,220,248
1153.byte 102,68,15,56,220,192
1154.byte 102,68,15,56,220,200
1155 movups 96-128(%rcx),%xmm0
1156 bswapl %r9d
1157.byte 102,15,56,220,209
1158.byte 102,15,56,220,217
1159 xorl %ebp,%r9d
1160.byte 0x66,0x90
1161.byte 102,15,56,220,225
1162.byte 102,15,56,220,233
1163 movl %r9d,64+12(%rsp)
1164 leaq 5(%r8),%r9
1165.byte 102,15,56,220,241
1166.byte 102,15,56,220,249
1167.byte 102,68,15,56,220,193
1168.byte 102,68,15,56,220,201
1169 movups 112-128(%rcx),%xmm1
1170 bswapl %r9d
1171.byte 102,15,56,220,208
1172.byte 102,15,56,220,216
1173 xorl %ebp,%r9d
1174.byte 0x66,0x90
1175.byte 102,15,56,220,224
1176.byte 102,15,56,220,232
1177 movl %r9d,80+12(%rsp)
1178 leaq 6(%r8),%r9
1179.byte 102,15,56,220,240
1180.byte 102,15,56,220,248
1181.byte 102,68,15,56,220,192
1182.byte 102,68,15,56,220,200
1183 movups 128-128(%rcx),%xmm0
1184 bswapl %r9d
1185.byte 102,15,56,220,209
1186.byte 102,15,56,220,217
1187 xorl %ebp,%r9d
1188.byte 0x66,0x90
1189.byte 102,15,56,220,225
1190.byte 102,15,56,220,233
1191 movl %r9d,96+12(%rsp)
1192 leaq 7(%r8),%r9
1193.byte 102,15,56,220,241
1194.byte 102,15,56,220,249
1195.byte 102,68,15,56,220,193
1196.byte 102,68,15,56,220,201
1197 movups 144-128(%rcx),%xmm1
1198 bswapl %r9d
1199.byte 102,15,56,220,208
1200.byte 102,15,56,220,216
1201.byte 102,15,56,220,224
1202 xorl %ebp,%r9d
1203 movdqu 0(%rdi),%xmm10
1204.byte 102,15,56,220,232
1205 movl %r9d,112+12(%rsp)
1206 cmpl $11,%eax
1207.byte 102,15,56,220,240
1208.byte 102,15,56,220,248
1209.byte 102,68,15,56,220,192
1210.byte 102,68,15,56,220,200
1211 movups 160-128(%rcx),%xmm0
1212
1213 jb .Lctr32_enc_done
1214
1215.byte 102,15,56,220,209
1216.byte 102,15,56,220,217
1217.byte 102,15,56,220,225
1218.byte 102,15,56,220,233
1219.byte 102,15,56,220,241
1220.byte 102,15,56,220,249
1221.byte 102,68,15,56,220,193
1222.byte 102,68,15,56,220,201
1223 movups 176-128(%rcx),%xmm1
1224
1225.byte 102,15,56,220,208
1226.byte 102,15,56,220,216
1227.byte 102,15,56,220,224
1228.byte 102,15,56,220,232
1229.byte 102,15,56,220,240
1230.byte 102,15,56,220,248
1231.byte 102,68,15,56,220,192
1232.byte 102,68,15,56,220,200
1233 movups 192-128(%rcx),%xmm0
1234 je .Lctr32_enc_done
1235
1236.byte 102,15,56,220,209
1237.byte 102,15,56,220,217
1238.byte 102,15,56,220,225
1239.byte 102,15,56,220,233
1240.byte 102,15,56,220,241
1241.byte 102,15,56,220,249
1242.byte 102,68,15,56,220,193
1243.byte 102,68,15,56,220,201
1244 movups 208-128(%rcx),%xmm1
1245
1246.byte 102,15,56,220,208
1247.byte 102,15,56,220,216
1248.byte 102,15,56,220,224
1249.byte 102,15,56,220,232
1250.byte 102,15,56,220,240
1251.byte 102,15,56,220,248
1252.byte 102,68,15,56,220,192
1253.byte 102,68,15,56,220,200
1254 movups 224-128(%rcx),%xmm0
1255 jmp .Lctr32_enc_done
1256
1257.align 16
1258.Lctr32_enc_done:
1259 movdqu 16(%rdi),%xmm11
1260 pxor %xmm0,%xmm10
1261 movdqu 32(%rdi),%xmm12
1262 pxor %xmm0,%xmm11
1263 movdqu 48(%rdi),%xmm13
1264 pxor %xmm0,%xmm12
1265 movdqu 64(%rdi),%xmm14
1266 pxor %xmm0,%xmm13
1267 movdqu 80(%rdi),%xmm15
1268 pxor %xmm0,%xmm14
1269 pxor %xmm0,%xmm15
1270.byte 102,15,56,220,209
1271.byte 102,15,56,220,217
1272.byte 102,15,56,220,225
1273.byte 102,15,56,220,233
1274.byte 102,15,56,220,241
1275.byte 102,15,56,220,249
1276.byte 102,68,15,56,220,193
1277.byte 102,68,15,56,220,201
1278 movdqu 96(%rdi),%xmm1
1279 leaq 128(%rdi),%rdi
1280
1281.byte 102,65,15,56,221,210
1282 pxor %xmm0,%xmm1
1283 movdqu 112-128(%rdi),%xmm10
1284.byte 102,65,15,56,221,219
1285 pxor %xmm0,%xmm10
1286 movdqa 0(%rsp),%xmm11
1287.byte 102,65,15,56,221,228
1288.byte 102,65,15,56,221,237
1289 movdqa 16(%rsp),%xmm12
1290 movdqa 32(%rsp),%xmm13
1291.byte 102,65,15,56,221,246
1292.byte 102,65,15,56,221,255
1293 movdqa 48(%rsp),%xmm14
1294 movdqa 64(%rsp),%xmm15
1295.byte 102,68,15,56,221,193
1296 movdqa 80(%rsp),%xmm0
1297 movups 16-128(%rcx),%xmm1
1298.byte 102,69,15,56,221,202
1299
1300 movups %xmm2,(%rsi)
1301 movdqa %xmm11,%xmm2
1302 movups %xmm3,16(%rsi)
1303 movdqa %xmm12,%xmm3
1304 movups %xmm4,32(%rsi)
1305 movdqa %xmm13,%xmm4
1306 movups %xmm5,48(%rsi)
1307 movdqa %xmm14,%xmm5
1308 movups %xmm6,64(%rsi)
1309 movdqa %xmm15,%xmm6
1310 movups %xmm7,80(%rsi)
1311 movdqa %xmm0,%xmm7
1312 movups %xmm8,96(%rsi)
1313 movups %xmm9,112(%rsi)
1314 leaq 128(%rsi),%rsi
1315
1316 subq $8,%rdx
1317 jnc .Lctr32_loop8
1318
1319 addq $8,%rdx
1320 jz .Lctr32_done
1321 leaq -128(%rcx),%rcx
1322
1323.Lctr32_tail:
1324
1325
1326 leaq 16(%rcx),%rcx
1327 cmpq $4,%rdx
1328 jb .Lctr32_loop3
1329 je .Lctr32_loop4
1330
1331
1332 shll $4,%eax
1333 movdqa 96(%rsp),%xmm8
1334 pxor %xmm9,%xmm9
1335
1336 movups 16(%rcx),%xmm0
1337.byte 102,15,56,220,209
1338.byte 102,15,56,220,217
1339 leaq 32-16(%rcx,%rax,1),%rcx
1340 negq %rax
1341.byte 102,15,56,220,225
1342 addq $16,%rax
1343 movups (%rdi),%xmm10
1344.byte 102,15,56,220,233
1345.byte 102,15,56,220,241
1346 movups 16(%rdi),%xmm11
1347 movups 32(%rdi),%xmm12
1348.byte 102,15,56,220,249
1349.byte 102,68,15,56,220,193
1350
1351 call .Lenc_loop8_enter
1352
1353 movdqu 48(%rdi),%xmm13
1354 pxor %xmm10,%xmm2
1355 movdqu 64(%rdi),%xmm10
1356 pxor %xmm11,%xmm3
1357 movdqu %xmm2,(%rsi)
1358 pxor %xmm12,%xmm4
1359 movdqu %xmm3,16(%rsi)
1360 pxor %xmm13,%xmm5
1361 movdqu %xmm4,32(%rsi)
1362 pxor %xmm10,%xmm6
1363 movdqu %xmm5,48(%rsi)
1364 movdqu %xmm6,64(%rsi)
1365 cmpq $6,%rdx
1366 jb .Lctr32_done
1367
1368 movups 80(%rdi),%xmm11
1369 xorps %xmm11,%xmm7
1370 movups %xmm7,80(%rsi)
1371 je .Lctr32_done
1372
1373 movups 96(%rdi),%xmm12
1374 xorps %xmm12,%xmm8
1375 movups %xmm8,96(%rsi)
1376 jmp .Lctr32_done
1377
1378.align 32
1379.Lctr32_loop4:
1380.byte 102,15,56,220,209
1381 leaq 16(%rcx),%rcx
1382 decl %eax
1383.byte 102,15,56,220,217
1384.byte 102,15,56,220,225
1385.byte 102,15,56,220,233
1386 movups (%rcx),%xmm1
1387 jnz .Lctr32_loop4
1388.byte 102,15,56,221,209
1389.byte 102,15,56,221,217
1390 movups (%rdi),%xmm10
1391 movups 16(%rdi),%xmm11
1392.byte 102,15,56,221,225
1393.byte 102,15,56,221,233
1394 movups 32(%rdi),%xmm12
1395 movups 48(%rdi),%xmm13
1396
1397 xorps %xmm10,%xmm2
1398 movups %xmm2,(%rsi)
1399 xorps %xmm11,%xmm3
1400 movups %xmm3,16(%rsi)
1401 pxor %xmm12,%xmm4
1402 movdqu %xmm4,32(%rsi)
1403 pxor %xmm13,%xmm5
1404 movdqu %xmm5,48(%rsi)
1405 jmp .Lctr32_done
1406
1407.align 32
1408.Lctr32_loop3:
1409.byte 102,15,56,220,209
1410 leaq 16(%rcx),%rcx
1411 decl %eax
1412.byte 102,15,56,220,217
1413.byte 102,15,56,220,225
1414 movups (%rcx),%xmm1
1415 jnz .Lctr32_loop3
1416.byte 102,15,56,221,209
1417.byte 102,15,56,221,217
1418.byte 102,15,56,221,225
1419
1420 movups (%rdi),%xmm10
1421 xorps %xmm10,%xmm2
1422 movups %xmm2,(%rsi)
1423 cmpq $2,%rdx
1424 jb .Lctr32_done
1425
1426 movups 16(%rdi),%xmm11
1427 xorps %xmm11,%xmm3
1428 movups %xmm3,16(%rsi)
1429 je .Lctr32_done
1430
1431 movups 32(%rdi),%xmm12
1432 xorps %xmm12,%xmm4
1433 movups %xmm4,32(%rsi)
1434
1435.Lctr32_done:
1436 xorps %xmm0,%xmm0
1437 xorl %ebp,%ebp
1438 pxor %xmm1,%xmm1
1439 pxor %xmm2,%xmm2
1440 pxor %xmm3,%xmm3
1441 pxor %xmm4,%xmm4
1442 pxor %xmm5,%xmm5
1443 pxor %xmm6,%xmm6
1444 pxor %xmm7,%xmm7
1445 movaps %xmm0,0(%rsp)
1446 pxor %xmm8,%xmm8
1447 movaps %xmm0,16(%rsp)
1448 pxor %xmm9,%xmm9
1449 movaps %xmm0,32(%rsp)
1450 pxor %xmm10,%xmm10
1451 movaps %xmm0,48(%rsp)
1452 pxor %xmm11,%xmm11
1453 movaps %xmm0,64(%rsp)
1454 pxor %xmm12,%xmm12
1455 movaps %xmm0,80(%rsp)
1456 pxor %xmm13,%xmm13
1457 movaps %xmm0,96(%rsp)
1458 pxor %xmm14,%xmm14
1459 movaps %xmm0,112(%rsp)
1460 pxor %xmm15,%xmm15
1461 movq -8(%r11),%rbp
1462.cfi_restore %rbp
1463 leaq (%r11),%rsp
1464.cfi_def_cfa_register %rsp
1465.Lctr32_epilogue:
1466 .byte 0xf3,0xc3
1467.cfi_endproc
1468.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
1469.globl aes_hw_cbc_encrypt
1470.hidden aes_hw_cbc_encrypt
1471.type aes_hw_cbc_encrypt,@function
1472.align 16
1473aes_hw_cbc_encrypt:
1474.cfi_startproc
1475 testq %rdx,%rdx
1476 jz .Lcbc_ret
1477
1478 movl 240(%rcx),%r10d
1479 movq %rcx,%r11
1480 testl %r9d,%r9d
1481 jz .Lcbc_decrypt
1482
1483 movups (%r8),%xmm2
1484 movl %r10d,%eax
1485 cmpq $16,%rdx
1486 jb .Lcbc_enc_tail
1487 subq $16,%rdx
1488 jmp .Lcbc_enc_loop
1489.align 16
1490.Lcbc_enc_loop:
1491 movups (%rdi),%xmm3
1492 leaq 16(%rdi),%rdi
1493
1494 movups (%rcx),%xmm0
1495 movups 16(%rcx),%xmm1
1496 xorps %xmm0,%xmm3
1497 leaq 32(%rcx),%rcx
1498 xorps %xmm3,%xmm2
1499.Loop_enc1_6:
1500.byte 102,15,56,220,209
1501 decl %eax
1502 movups (%rcx),%xmm1
1503 leaq 16(%rcx),%rcx
1504 jnz .Loop_enc1_6
1505.byte 102,15,56,221,209
1506 movl %r10d,%eax
1507 movq %r11,%rcx
1508 movups %xmm2,0(%rsi)
1509 leaq 16(%rsi),%rsi
1510 subq $16,%rdx
1511 jnc .Lcbc_enc_loop
1512 addq $16,%rdx
1513 jnz .Lcbc_enc_tail
1514 pxor %xmm0,%xmm0
1515 pxor %xmm1,%xmm1
1516 movups %xmm2,(%r8)
1517 pxor %xmm2,%xmm2
1518 pxor %xmm3,%xmm3
1519 jmp .Lcbc_ret
1520
1521.Lcbc_enc_tail:
1522 movq %rdx,%rcx
1523 xchgq %rdi,%rsi
1524.long 0x9066A4F3
1525 movl $16,%ecx
1526 subq %rdx,%rcx
1527 xorl %eax,%eax
1528.long 0x9066AAF3
1529 leaq -16(%rdi),%rdi
1530 movl %r10d,%eax
1531 movq %rdi,%rsi
1532 movq %r11,%rcx
1533 xorq %rdx,%rdx
1534 jmp .Lcbc_enc_loop
1535
1536.align 16
1537.Lcbc_decrypt:
1538 cmpq $16,%rdx
1539 jne .Lcbc_decrypt_bulk
1540
1541
1542
1543 movdqu (%rdi),%xmm2
1544 movdqu (%r8),%xmm3
1545 movdqa %xmm2,%xmm4
1546 movups (%rcx),%xmm0
1547 movups 16(%rcx),%xmm1
1548 leaq 32(%rcx),%rcx
1549 xorps %xmm0,%xmm2
1550.Loop_dec1_7:
1551.byte 102,15,56,222,209
1552 decl %r10d
1553 movups (%rcx),%xmm1
1554 leaq 16(%rcx),%rcx
1555 jnz .Loop_dec1_7
1556.byte 102,15,56,223,209
1557 pxor %xmm0,%xmm0
1558 pxor %xmm1,%xmm1
1559 movdqu %xmm4,(%r8)
1560 xorps %xmm3,%xmm2
1561 pxor %xmm3,%xmm3
1562 movups %xmm2,(%rsi)
1563 pxor %xmm2,%xmm2
1564 jmp .Lcbc_ret
1565.align 16
1566.Lcbc_decrypt_bulk:
1567 leaq (%rsp),%r11
1568.cfi_def_cfa_register %r11
1569 pushq %rbp
1570.cfi_offset %rbp,-16
1571 subq $16,%rsp
1572 andq $-16,%rsp
1573 movq %rcx,%rbp
1574 movups (%r8),%xmm10
1575 movl %r10d,%eax
1576 cmpq $0x50,%rdx
1577 jbe .Lcbc_dec_tail
1578
1579 movups (%rcx),%xmm0
1580 movdqu 0(%rdi),%xmm2
1581 movdqu 16(%rdi),%xmm3
1582 movdqa %xmm2,%xmm11
1583 movdqu 32(%rdi),%xmm4
1584 movdqa %xmm3,%xmm12
1585 movdqu 48(%rdi),%xmm5
1586 movdqa %xmm4,%xmm13
1587 movdqu 64(%rdi),%xmm6
1588 movdqa %xmm5,%xmm14
1589 movdqu 80(%rdi),%xmm7
1590 movdqa %xmm6,%xmm15
1591 leaq OPENSSL_ia32cap_P(%rip),%r9
1592 movl 4(%r9),%r9d
1593 cmpq $0x70,%rdx
1594 jbe .Lcbc_dec_six_or_seven
1595
1596 andl $71303168,%r9d
1597 subq $0x50,%rdx
1598 cmpl $4194304,%r9d
1599 je .Lcbc_dec_loop6_enter
1600 subq $0x20,%rdx
1601 leaq 112(%rcx),%rcx
1602 jmp .Lcbc_dec_loop8_enter
1603.align 16
1604.Lcbc_dec_loop8:
1605 movups %xmm9,(%rsi)
1606 leaq 16(%rsi),%rsi
1607.Lcbc_dec_loop8_enter:
1608 movdqu 96(%rdi),%xmm8
1609 pxor %xmm0,%xmm2
1610 movdqu 112(%rdi),%xmm9
1611 pxor %xmm0,%xmm3
1612 movups 16-112(%rcx),%xmm1
1613 pxor %xmm0,%xmm4
1614 movq $-1,%rbp
1615 cmpq $0x70,%rdx
1616 pxor %xmm0,%xmm5
1617 pxor %xmm0,%xmm6
1618 pxor %xmm0,%xmm7
1619 pxor %xmm0,%xmm8
1620
1621.byte 102,15,56,222,209
1622 pxor %xmm0,%xmm9
1623 movups 32-112(%rcx),%xmm0
1624.byte 102,15,56,222,217
1625.byte 102,15,56,222,225
1626.byte 102,15,56,222,233
1627.byte 102,15,56,222,241
1628.byte 102,15,56,222,249
1629.byte 102,68,15,56,222,193
1630 adcq $0,%rbp
1631 andq $128,%rbp
1632.byte 102,68,15,56,222,201
1633 addq %rdi,%rbp
1634 movups 48-112(%rcx),%xmm1
1635.byte 102,15,56,222,208
1636.byte 102,15,56,222,216
1637.byte 102,15,56,222,224
1638.byte 102,15,56,222,232
1639.byte 102,15,56,222,240
1640.byte 102,15,56,222,248
1641.byte 102,68,15,56,222,192
1642.byte 102,68,15,56,222,200
1643 movups 64-112(%rcx),%xmm0
1644 nop
1645.byte 102,15,56,222,209
1646.byte 102,15,56,222,217
1647.byte 102,15,56,222,225
1648.byte 102,15,56,222,233
1649.byte 102,15,56,222,241
1650.byte 102,15,56,222,249
1651.byte 102,68,15,56,222,193
1652.byte 102,68,15,56,222,201
1653 movups 80-112(%rcx),%xmm1
1654 nop
1655.byte 102,15,56,222,208
1656.byte 102,15,56,222,216
1657.byte 102,15,56,222,224
1658.byte 102,15,56,222,232
1659.byte 102,15,56,222,240
1660.byte 102,15,56,222,248
1661.byte 102,68,15,56,222,192
1662.byte 102,68,15,56,222,200
1663 movups 96-112(%rcx),%xmm0
1664 nop
1665.byte 102,15,56,222,209
1666.byte 102,15,56,222,217
1667.byte 102,15,56,222,225
1668.byte 102,15,56,222,233
1669.byte 102,15,56,222,241
1670.byte 102,15,56,222,249
1671.byte 102,68,15,56,222,193
1672.byte 102,68,15,56,222,201
1673 movups 112-112(%rcx),%xmm1
1674 nop
1675.byte 102,15,56,222,208
1676.byte 102,15,56,222,216
1677.byte 102,15,56,222,224
1678.byte 102,15,56,222,232
1679.byte 102,15,56,222,240
1680.byte 102,15,56,222,248
1681.byte 102,68,15,56,222,192
1682.byte 102,68,15,56,222,200
1683 movups 128-112(%rcx),%xmm0
1684 nop
1685.byte 102,15,56,222,209
1686.byte 102,15,56,222,217
1687.byte 102,15,56,222,225
1688.byte 102,15,56,222,233
1689.byte 102,15,56,222,241
1690.byte 102,15,56,222,249
1691.byte 102,68,15,56,222,193
1692.byte 102,68,15,56,222,201
1693 movups 144-112(%rcx),%xmm1
1694 cmpl $11,%eax
1695.byte 102,15,56,222,208
1696.byte 102,15,56,222,216
1697.byte 102,15,56,222,224
1698.byte 102,15,56,222,232
1699.byte 102,15,56,222,240
1700.byte 102,15,56,222,248
1701.byte 102,68,15,56,222,192
1702.byte 102,68,15,56,222,200
1703 movups 160-112(%rcx),%xmm0
1704 jb .Lcbc_dec_done
1705.byte 102,15,56,222,209
1706.byte 102,15,56,222,217
1707.byte 102,15,56,222,225
1708.byte 102,15,56,222,233
1709.byte 102,15,56,222,241
1710.byte 102,15,56,222,249
1711.byte 102,68,15,56,222,193
1712.byte 102,68,15,56,222,201
1713 movups 176-112(%rcx),%xmm1
1714 nop
1715.byte 102,15,56,222,208
1716.byte 102,15,56,222,216
1717.byte 102,15,56,222,224
1718.byte 102,15,56,222,232
1719.byte 102,15,56,222,240
1720.byte 102,15,56,222,248
1721.byte 102,68,15,56,222,192
1722.byte 102,68,15,56,222,200
1723 movups 192-112(%rcx),%xmm0
1724 je .Lcbc_dec_done
1725.byte 102,15,56,222,209
1726.byte 102,15,56,222,217
1727.byte 102,15,56,222,225
1728.byte 102,15,56,222,233
1729.byte 102,15,56,222,241
1730.byte 102,15,56,222,249
1731.byte 102,68,15,56,222,193
1732.byte 102,68,15,56,222,201
1733 movups 208-112(%rcx),%xmm1
1734 nop
1735.byte 102,15,56,222,208
1736.byte 102,15,56,222,216
1737.byte 102,15,56,222,224
1738.byte 102,15,56,222,232
1739.byte 102,15,56,222,240
1740.byte 102,15,56,222,248
1741.byte 102,68,15,56,222,192
1742.byte 102,68,15,56,222,200
1743 movups 224-112(%rcx),%xmm0
1744 jmp .Lcbc_dec_done
1745.align 16
1746.Lcbc_dec_done:
1747.byte 102,15,56,222,209
1748.byte 102,15,56,222,217
1749 pxor %xmm0,%xmm10
1750 pxor %xmm0,%xmm11
1751.byte 102,15,56,222,225
1752.byte 102,15,56,222,233
1753 pxor %xmm0,%xmm12
1754 pxor %xmm0,%xmm13
1755.byte 102,15,56,222,241
1756.byte 102,15,56,222,249
1757 pxor %xmm0,%xmm14
1758 pxor %xmm0,%xmm15
1759.byte 102,68,15,56,222,193
1760.byte 102,68,15,56,222,201
1761 movdqu 80(%rdi),%xmm1
1762
1763.byte 102,65,15,56,223,210
1764 movdqu 96(%rdi),%xmm10
1765 pxor %xmm0,%xmm1
1766.byte 102,65,15,56,223,219
1767 pxor %xmm0,%xmm10
1768 movdqu 112(%rdi),%xmm0
1769.byte 102,65,15,56,223,228
1770 leaq 128(%rdi),%rdi
1771 movdqu 0(%rbp),%xmm11
1772.byte 102,65,15,56,223,237
1773.byte 102,65,15,56,223,246
1774 movdqu 16(%rbp),%xmm12
1775 movdqu 32(%rbp),%xmm13
1776.byte 102,65,15,56,223,255
1777.byte 102,68,15,56,223,193
1778 movdqu 48(%rbp),%xmm14
1779 movdqu 64(%rbp),%xmm15
1780.byte 102,69,15,56,223,202
1781 movdqa %xmm0,%xmm10
1782 movdqu 80(%rbp),%xmm1
1783 movups -112(%rcx),%xmm0
1784
1785 movups %xmm2,(%rsi)
1786 movdqa %xmm11,%xmm2
1787 movups %xmm3,16(%rsi)
1788 movdqa %xmm12,%xmm3
1789 movups %xmm4,32(%rsi)
1790 movdqa %xmm13,%xmm4
1791 movups %xmm5,48(%rsi)
1792 movdqa %xmm14,%xmm5
1793 movups %xmm6,64(%rsi)
1794 movdqa %xmm15,%xmm6
1795 movups %xmm7,80(%rsi)
1796 movdqa %xmm1,%xmm7
1797 movups %xmm8,96(%rsi)
1798 leaq 112(%rsi),%rsi
1799
1800 subq $0x80,%rdx
1801 ja .Lcbc_dec_loop8
1802
1803 movaps %xmm9,%xmm2
1804 leaq -112(%rcx),%rcx
1805 addq $0x70,%rdx
1806 jle .Lcbc_dec_clear_tail_collected
1807 movups %xmm9,(%rsi)
1808 leaq 16(%rsi),%rsi
1809 cmpq $0x50,%rdx
1810 jbe .Lcbc_dec_tail
1811
1812 movaps %xmm11,%xmm2
1813.Lcbc_dec_six_or_seven:
1814 cmpq $0x60,%rdx
1815 ja .Lcbc_dec_seven
1816
1817 movaps %xmm7,%xmm8
1818 call _aesni_decrypt6
1819 pxor %xmm10,%xmm2
1820 movaps %xmm8,%xmm10
1821 pxor %xmm11,%xmm3
1822 movdqu %xmm2,(%rsi)
1823 pxor %xmm12,%xmm4
1824 movdqu %xmm3,16(%rsi)
1825 pxor %xmm3,%xmm3
1826 pxor %xmm13,%xmm5
1827 movdqu %xmm4,32(%rsi)
1828 pxor %xmm4,%xmm4
1829 pxor %xmm14,%xmm6
1830 movdqu %xmm5,48(%rsi)
1831 pxor %xmm5,%xmm5
1832 pxor %xmm15,%xmm7
1833 movdqu %xmm6,64(%rsi)
1834 pxor %xmm6,%xmm6
1835 leaq 80(%rsi),%rsi
1836 movdqa %xmm7,%xmm2
1837 pxor %xmm7,%xmm7
1838 jmp .Lcbc_dec_tail_collected
1839
1840.align 16
1841.Lcbc_dec_seven:
1842 movups 96(%rdi),%xmm8
1843 xorps %xmm9,%xmm9
1844 call _aesni_decrypt8
1845 movups 80(%rdi),%xmm9
1846 pxor %xmm10,%xmm2
1847 movups 96(%rdi),%xmm10
1848 pxor %xmm11,%xmm3
1849 movdqu %xmm2,(%rsi)
1850 pxor %xmm12,%xmm4
1851 movdqu %xmm3,16(%rsi)
1852 pxor %xmm3,%xmm3
1853 pxor %xmm13,%xmm5
1854 movdqu %xmm4,32(%rsi)
1855 pxor %xmm4,%xmm4
1856 pxor %xmm14,%xmm6
1857 movdqu %xmm5,48(%rsi)
1858 pxor %xmm5,%xmm5
1859 pxor %xmm15,%xmm7
1860 movdqu %xmm6,64(%rsi)
1861 pxor %xmm6,%xmm6
1862 pxor %xmm9,%xmm8
1863 movdqu %xmm7,80(%rsi)
1864 pxor %xmm7,%xmm7
1865 leaq 96(%rsi),%rsi
1866 movdqa %xmm8,%xmm2
1867 pxor %xmm8,%xmm8
1868 pxor %xmm9,%xmm9
1869 jmp .Lcbc_dec_tail_collected
1870
1871.align 16
1872.Lcbc_dec_loop6:
1873 movups %xmm7,(%rsi)
1874 leaq 16(%rsi),%rsi
1875 movdqu 0(%rdi),%xmm2
1876 movdqu 16(%rdi),%xmm3
1877 movdqa %xmm2,%xmm11
1878 movdqu 32(%rdi),%xmm4
1879 movdqa %xmm3,%xmm12
1880 movdqu 48(%rdi),%xmm5
1881 movdqa %xmm4,%xmm13
1882 movdqu 64(%rdi),%xmm6
1883 movdqa %xmm5,%xmm14
1884 movdqu 80(%rdi),%xmm7
1885 movdqa %xmm6,%xmm15
1886.Lcbc_dec_loop6_enter:
1887 leaq 96(%rdi),%rdi
1888 movdqa %xmm7,%xmm8
1889
1890 call _aesni_decrypt6
1891
1892 pxor %xmm10,%xmm2
1893 movdqa %xmm8,%xmm10
1894 pxor %xmm11,%xmm3
1895 movdqu %xmm2,(%rsi)
1896 pxor %xmm12,%xmm4
1897 movdqu %xmm3,16(%rsi)
1898 pxor %xmm13,%xmm5
1899 movdqu %xmm4,32(%rsi)
1900 pxor %xmm14,%xmm6
1901 movq %rbp,%rcx
1902 movdqu %xmm5,48(%rsi)
1903 pxor %xmm15,%xmm7
1904 movl %r10d,%eax
1905 movdqu %xmm6,64(%rsi)
1906 leaq 80(%rsi),%rsi
1907 subq $0x60,%rdx
1908 ja .Lcbc_dec_loop6
1909
1910 movdqa %xmm7,%xmm2
1911 addq $0x50,%rdx
1912 jle .Lcbc_dec_clear_tail_collected
1913 movups %xmm7,(%rsi)
1914 leaq 16(%rsi),%rsi
1915
1916.Lcbc_dec_tail:
1917 movups (%rdi),%xmm2
1918 subq $0x10,%rdx
1919 jbe .Lcbc_dec_one
1920
1921 movups 16(%rdi),%xmm3
1922 movaps %xmm2,%xmm11
1923 subq $0x10,%rdx
1924 jbe .Lcbc_dec_two
1925
1926 movups 32(%rdi),%xmm4
1927 movaps %xmm3,%xmm12
1928 subq $0x10,%rdx
1929 jbe .Lcbc_dec_three
1930
1931 movups 48(%rdi),%xmm5
1932 movaps %xmm4,%xmm13
1933 subq $0x10,%rdx
1934 jbe .Lcbc_dec_four
1935
1936 movups 64(%rdi),%xmm6
1937 movaps %xmm5,%xmm14
1938 movaps %xmm6,%xmm15
1939 xorps %xmm7,%xmm7
1940 call _aesni_decrypt6
1941 pxor %xmm10,%xmm2
1942 movaps %xmm15,%xmm10
1943 pxor %xmm11,%xmm3
1944 movdqu %xmm2,(%rsi)
1945 pxor %xmm12,%xmm4
1946 movdqu %xmm3,16(%rsi)
1947 pxor %xmm3,%xmm3
1948 pxor %xmm13,%xmm5
1949 movdqu %xmm4,32(%rsi)
1950 pxor %xmm4,%xmm4
1951 pxor %xmm14,%xmm6
1952 movdqu %xmm5,48(%rsi)
1953 pxor %xmm5,%xmm5
1954 leaq 64(%rsi),%rsi
1955 movdqa %xmm6,%xmm2
1956 pxor %xmm6,%xmm6
1957 pxor %xmm7,%xmm7
1958 subq $0x10,%rdx
1959 jmp .Lcbc_dec_tail_collected
1960
1961.align 16
1962.Lcbc_dec_one:
1963 movaps %xmm2,%xmm11
1964 movups (%rcx),%xmm0
1965 movups 16(%rcx),%xmm1
1966 leaq 32(%rcx),%rcx
1967 xorps %xmm0,%xmm2
1968.Loop_dec1_8:
1969.byte 102,15,56,222,209
1970 decl %eax
1971 movups (%rcx),%xmm1
1972 leaq 16(%rcx),%rcx
1973 jnz .Loop_dec1_8
1974.byte 102,15,56,223,209
1975 xorps %xmm10,%xmm2
1976 movaps %xmm11,%xmm10
1977 jmp .Lcbc_dec_tail_collected
1978.align 16
1979.Lcbc_dec_two:
1980 movaps %xmm3,%xmm12
1981 call _aesni_decrypt2
1982 pxor %xmm10,%xmm2
1983 movaps %xmm12,%xmm10
1984 pxor %xmm11,%xmm3
1985 movdqu %xmm2,(%rsi)
1986 movdqa %xmm3,%xmm2
1987 pxor %xmm3,%xmm3
1988 leaq 16(%rsi),%rsi
1989 jmp .Lcbc_dec_tail_collected
1990.align 16
1991.Lcbc_dec_three:
1992 movaps %xmm4,%xmm13
1993 call _aesni_decrypt3
1994 pxor %xmm10,%xmm2
1995 movaps %xmm13,%xmm10
1996 pxor %xmm11,%xmm3
1997 movdqu %xmm2,(%rsi)
1998 pxor %xmm12,%xmm4
1999 movdqu %xmm3,16(%rsi)
2000 pxor %xmm3,%xmm3
2001 movdqa %xmm4,%xmm2
2002 pxor %xmm4,%xmm4
2003 leaq 32(%rsi),%rsi
2004 jmp .Lcbc_dec_tail_collected
2005.align 16
2006.Lcbc_dec_four:
2007 movaps %xmm5,%xmm14
2008 call _aesni_decrypt4
2009 pxor %xmm10,%xmm2
2010 movaps %xmm14,%xmm10
2011 pxor %xmm11,%xmm3
2012 movdqu %xmm2,(%rsi)
2013 pxor %xmm12,%xmm4
2014 movdqu %xmm3,16(%rsi)
2015 pxor %xmm3,%xmm3
2016 pxor %xmm13,%xmm5
2017 movdqu %xmm4,32(%rsi)
2018 pxor %xmm4,%xmm4
2019 movdqa %xmm5,%xmm2
2020 pxor %xmm5,%xmm5
2021 leaq 48(%rsi),%rsi
2022 jmp .Lcbc_dec_tail_collected
2023
2024.align 16
2025.Lcbc_dec_clear_tail_collected:
2026 pxor %xmm3,%xmm3
2027 pxor %xmm4,%xmm4
2028 pxor %xmm5,%xmm5
2029 pxor %xmm6,%xmm6
2030 pxor %xmm7,%xmm7
2031 pxor %xmm8,%xmm8
2032 pxor %xmm9,%xmm9
2033.Lcbc_dec_tail_collected:
2034 movups %xmm10,(%r8)
2035 andq $15,%rdx
2036 jnz .Lcbc_dec_tail_partial
2037 movups %xmm2,(%rsi)
2038 pxor %xmm2,%xmm2
2039 jmp .Lcbc_dec_ret
2040.align 16
2041.Lcbc_dec_tail_partial:
2042 movaps %xmm2,(%rsp)
2043 pxor %xmm2,%xmm2
2044 movq $16,%rcx
2045 movq %rsi,%rdi
2046 subq %rdx,%rcx
2047 leaq (%rsp),%rsi
2048.long 0x9066A4F3
2049 movdqa %xmm2,(%rsp)
2050
2051.Lcbc_dec_ret:
2052 xorps %xmm0,%xmm0
2053 pxor %xmm1,%xmm1
2054 movq -8(%r11),%rbp
2055.cfi_restore %rbp
2056 leaq (%r11),%rsp
2057.cfi_def_cfa_register %rsp
2058.Lcbc_ret:
2059 .byte 0xf3,0xc3
2060.cfi_endproc
2061.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
2062.globl aes_hw_set_decrypt_key
2063.hidden aes_hw_set_decrypt_key
2064.type aes_hw_set_decrypt_key,@function
2065.align 16
2066aes_hw_set_decrypt_key:
2067.cfi_startproc
2068.byte 0x48,0x83,0xEC,0x08
2069.cfi_adjust_cfa_offset 8
2070 call __aesni_set_encrypt_key
2071 shll $4,%esi
2072 testl %eax,%eax
2073 jnz .Ldec_key_ret
2074 leaq 16(%rdx,%rsi,1),%rdi
2075
2076 movups (%rdx),%xmm0
2077 movups (%rdi),%xmm1
2078 movups %xmm0,(%rdi)
2079 movups %xmm1,(%rdx)
2080 leaq 16(%rdx),%rdx
2081 leaq -16(%rdi),%rdi
2082
2083.Ldec_key_inverse:
2084 movups (%rdx),%xmm0
2085 movups (%rdi),%xmm1
2086.byte 102,15,56,219,192
2087.byte 102,15,56,219,201
2088 leaq 16(%rdx),%rdx
2089 leaq -16(%rdi),%rdi
2090 movups %xmm0,16(%rdi)
2091 movups %xmm1,-16(%rdx)
2092 cmpq %rdx,%rdi
2093 ja .Ldec_key_inverse
2094
2095 movups (%rdx),%xmm0
2096.byte 102,15,56,219,192
2097 pxor %xmm1,%xmm1
2098 movups %xmm0,(%rdi)
2099 pxor %xmm0,%xmm0
2100.Ldec_key_ret:
2101 addq $8,%rsp
2102.cfi_adjust_cfa_offset -8
2103 .byte 0xf3,0xc3
2104.cfi_endproc
2105.LSEH_end_set_decrypt_key:
2106.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
2107.globl aes_hw_set_encrypt_key
2108.hidden aes_hw_set_encrypt_key
2109.type aes_hw_set_encrypt_key,@function
2110.align 16
2111aes_hw_set_encrypt_key:
2112__aesni_set_encrypt_key:
2113.cfi_startproc
2114#ifndef NDEBUG
2115#ifndef BORINGSSL_FIPS
2116 movb $1,BORINGSSL_function_hit+3(%rip)
2117#endif
2118#endif
2119.byte 0x48,0x83,0xEC,0x08
2120.cfi_adjust_cfa_offset 8
2121 movq $-1,%rax
2122 testq %rdi,%rdi
2123 jz .Lenc_key_ret
2124 testq %rdx,%rdx
2125 jz .Lenc_key_ret
2126
2127 movups (%rdi),%xmm0
2128 xorps %xmm4,%xmm4
2129 leaq OPENSSL_ia32cap_P(%rip),%r10
2130 movl 4(%r10),%r10d
2131 andl $268437504,%r10d
2132 leaq 16(%rdx),%rax
2133 cmpl $256,%esi
2134 je .L14rounds
2135 cmpl $192,%esi
2136 je .L12rounds
2137 cmpl $128,%esi
2138 jne .Lbad_keybits
2139
2140.L10rounds:
2141 movl $9,%esi
2142 cmpl $268435456,%r10d
2143 je .L10rounds_alt
2144
2145 movups %xmm0,(%rdx)
2146.byte 102,15,58,223,200,1
2147 call .Lkey_expansion_128_cold
2148.byte 102,15,58,223,200,2
2149 call .Lkey_expansion_128
2150.byte 102,15,58,223,200,4
2151 call .Lkey_expansion_128
2152.byte 102,15,58,223,200,8
2153 call .Lkey_expansion_128
2154.byte 102,15,58,223,200,16
2155 call .Lkey_expansion_128
2156.byte 102,15,58,223,200,32
2157 call .Lkey_expansion_128
2158.byte 102,15,58,223,200,64
2159 call .Lkey_expansion_128
2160.byte 102,15,58,223,200,128
2161 call .Lkey_expansion_128
2162.byte 102,15,58,223,200,27
2163 call .Lkey_expansion_128
2164.byte 102,15,58,223,200,54
2165 call .Lkey_expansion_128
2166 movups %xmm0,(%rax)
2167 movl %esi,80(%rax)
2168 xorl %eax,%eax
2169 jmp .Lenc_key_ret
2170
2171.align 16
2172.L10rounds_alt:
2173 movdqa .Lkey_rotate(%rip),%xmm5
2174 movl $8,%r10d
2175 movdqa .Lkey_rcon1(%rip),%xmm4
2176 movdqa %xmm0,%xmm2
2177 movdqu %xmm0,(%rdx)
2178 jmp .Loop_key128
2179
2180.align 16
2181.Loop_key128:
2182.byte 102,15,56,0,197
2183.byte 102,15,56,221,196
2184 pslld $1,%xmm4
2185 leaq 16(%rax),%rax
2186
2187 movdqa %xmm2,%xmm3
2188 pslldq $4,%xmm2
2189 pxor %xmm2,%xmm3
2190 pslldq $4,%xmm2
2191 pxor %xmm2,%xmm3
2192 pslldq $4,%xmm2
2193 pxor %xmm3,%xmm2
2194
2195 pxor %xmm2,%xmm0
2196 movdqu %xmm0,-16(%rax)
2197 movdqa %xmm0,%xmm2
2198
2199 decl %r10d
2200 jnz .Loop_key128
2201
2202 movdqa .Lkey_rcon1b(%rip),%xmm4
2203
2204.byte 102,15,56,0,197
2205.byte 102,15,56,221,196
2206 pslld $1,%xmm4
2207
2208 movdqa %xmm2,%xmm3
2209 pslldq $4,%xmm2
2210 pxor %xmm2,%xmm3
2211 pslldq $4,%xmm2
2212 pxor %xmm2,%xmm3
2213 pslldq $4,%xmm2
2214 pxor %xmm3,%xmm2
2215
2216 pxor %xmm2,%xmm0
2217 movdqu %xmm0,(%rax)
2218
2219 movdqa %xmm0,%xmm2
2220.byte 102,15,56,0,197
2221.byte 102,15,56,221,196
2222
2223 movdqa %xmm2,%xmm3
2224 pslldq $4,%xmm2
2225 pxor %xmm2,%xmm3
2226 pslldq $4,%xmm2
2227 pxor %xmm2,%xmm3
2228 pslldq $4,%xmm2
2229 pxor %xmm3,%xmm2
2230
2231 pxor %xmm2,%xmm0
2232 movdqu %xmm0,16(%rax)
2233
2234 movl %esi,96(%rax)
2235 xorl %eax,%eax
2236 jmp .Lenc_key_ret
2237
2238.align 16
2239.L12rounds:
2240 movq 16(%rdi),%xmm2
2241 movl $11,%esi
2242 cmpl $268435456,%r10d
2243 je .L12rounds_alt
2244
2245 movups %xmm0,(%rdx)
2246.byte 102,15,58,223,202,1
2247 call .Lkey_expansion_192a_cold
2248.byte 102,15,58,223,202,2
2249 call .Lkey_expansion_192b
2250.byte 102,15,58,223,202,4
2251 call .Lkey_expansion_192a
2252.byte 102,15,58,223,202,8
2253 call .Lkey_expansion_192b
2254.byte 102,15,58,223,202,16
2255 call .Lkey_expansion_192a
2256.byte 102,15,58,223,202,32
2257 call .Lkey_expansion_192b
2258.byte 102,15,58,223,202,64
2259 call .Lkey_expansion_192a
2260.byte 102,15,58,223,202,128
2261 call .Lkey_expansion_192b
2262 movups %xmm0,(%rax)
2263 movl %esi,48(%rax)
2264 xorq %rax,%rax
2265 jmp .Lenc_key_ret
2266
2267.align 16
2268.L12rounds_alt:
2269 movdqa .Lkey_rotate192(%rip),%xmm5
2270 movdqa .Lkey_rcon1(%rip),%xmm4
2271 movl $8,%r10d
2272 movdqu %xmm0,(%rdx)
2273 jmp .Loop_key192
2274
2275.align 16
2276.Loop_key192:
2277 movq %xmm2,0(%rax)
2278 movdqa %xmm2,%xmm1
2279.byte 102,15,56,0,213
2280.byte 102,15,56,221,212
2281 pslld $1,%xmm4
2282 leaq 24(%rax),%rax
2283
2284 movdqa %xmm0,%xmm3
2285 pslldq $4,%xmm0
2286 pxor %xmm0,%xmm3
2287 pslldq $4,%xmm0
2288 pxor %xmm0,%xmm3
2289 pslldq $4,%xmm0
2290 pxor %xmm3,%xmm0
2291
2292 pshufd $0xff,%xmm0,%xmm3
2293 pxor %xmm1,%xmm3
2294 pslldq $4,%xmm1
2295 pxor %xmm1,%xmm3
2296
2297 pxor %xmm2,%xmm0
2298 pxor %xmm3,%xmm2
2299 movdqu %xmm0,-16(%rax)
2300
2301 decl %r10d
2302 jnz .Loop_key192
2303
2304 movl %esi,32(%rax)
2305 xorl %eax,%eax
2306 jmp .Lenc_key_ret
2307
2308.align 16
2309.L14rounds:
2310 movups 16(%rdi),%xmm2
2311 movl $13,%esi
2312 leaq 16(%rax),%rax
2313 cmpl $268435456,%r10d
2314 je .L14rounds_alt
2315
2316 movups %xmm0,(%rdx)
2317 movups %xmm2,16(%rdx)
2318.byte 102,15,58,223,202,1
2319 call .Lkey_expansion_256a_cold
2320.byte 102,15,58,223,200,1
2321 call .Lkey_expansion_256b
2322.byte 102,15,58,223,202,2
2323 call .Lkey_expansion_256a
2324.byte 102,15,58,223,200,2
2325 call .Lkey_expansion_256b
2326.byte 102,15,58,223,202,4
2327 call .Lkey_expansion_256a
2328.byte 102,15,58,223,200,4
2329 call .Lkey_expansion_256b
2330.byte 102,15,58,223,202,8
2331 call .Lkey_expansion_256a
2332.byte 102,15,58,223,200,8
2333 call .Lkey_expansion_256b
2334.byte 102,15,58,223,202,16
2335 call .Lkey_expansion_256a
2336.byte 102,15,58,223,200,16
2337 call .Lkey_expansion_256b
2338.byte 102,15,58,223,202,32
2339 call .Lkey_expansion_256a
2340.byte 102,15,58,223,200,32
2341 call .Lkey_expansion_256b
2342.byte 102,15,58,223,202,64
2343 call .Lkey_expansion_256a
2344 movups %xmm0,(%rax)
2345 movl %esi,16(%rax)
2346 xorq %rax,%rax
2347 jmp .Lenc_key_ret
2348
2349.align 16
2350.L14rounds_alt:
2351 movdqa .Lkey_rotate(%rip),%xmm5
2352 movdqa .Lkey_rcon1(%rip),%xmm4
2353 movl $7,%r10d
2354 movdqu %xmm0,0(%rdx)
2355 movdqa %xmm2,%xmm1
2356 movdqu %xmm2,16(%rdx)
2357 jmp .Loop_key256
2358
2359.align 16
2360.Loop_key256:
2361.byte 102,15,56,0,213
2362.byte 102,15,56,221,212
2363
2364 movdqa %xmm0,%xmm3
2365 pslldq $4,%xmm0
2366 pxor %xmm0,%xmm3
2367 pslldq $4,%xmm0
2368 pxor %xmm0,%xmm3
2369 pslldq $4,%xmm0
2370 pxor %xmm3,%xmm0
2371 pslld $1,%xmm4
2372
2373 pxor %xmm2,%xmm0
2374 movdqu %xmm0,(%rax)
2375
2376 decl %r10d
2377 jz .Ldone_key256
2378
2379 pshufd $0xff,%xmm0,%xmm2
2380 pxor %xmm3,%xmm3
2381.byte 102,15,56,221,211
2382
2383 movdqa %xmm1,%xmm3
2384 pslldq $4,%xmm1
2385 pxor %xmm1,%xmm3
2386 pslldq $4,%xmm1
2387 pxor %xmm1,%xmm3
2388 pslldq $4,%xmm1
2389 pxor %xmm3,%xmm1
2390
2391 pxor %xmm1,%xmm2
2392 movdqu %xmm2,16(%rax)
2393 leaq 32(%rax),%rax
2394 movdqa %xmm2,%xmm1
2395
2396 jmp .Loop_key256
2397
2398.Ldone_key256:
2399 movl %esi,16(%rax)
2400 xorl %eax,%eax
2401 jmp .Lenc_key_ret
2402
2403.align 16
2404.Lbad_keybits:
2405 movq $-2,%rax
2406.Lenc_key_ret:
2407 pxor %xmm0,%xmm0
2408 pxor %xmm1,%xmm1
2409 pxor %xmm2,%xmm2
2410 pxor %xmm3,%xmm3
2411 pxor %xmm4,%xmm4
2412 pxor %xmm5,%xmm5
2413 addq $8,%rsp
2414.cfi_adjust_cfa_offset -8
2415 .byte 0xf3,0xc3
2416.cfi_endproc
2417.LSEH_end_set_encrypt_key:
2418
2419.align 16
2420.Lkey_expansion_128:
2421 movups %xmm0,(%rax)
2422 leaq 16(%rax),%rax
2423.Lkey_expansion_128_cold:
2424 shufps $16,%xmm0,%xmm4
2425 xorps %xmm4,%xmm0
2426 shufps $140,%xmm0,%xmm4
2427 xorps %xmm4,%xmm0
2428 shufps $255,%xmm1,%xmm1
2429 xorps %xmm1,%xmm0
2430 .byte 0xf3,0xc3
2431
2432.align 16
2433.Lkey_expansion_192a:
2434 movups %xmm0,(%rax)
2435 leaq 16(%rax),%rax
2436.Lkey_expansion_192a_cold:
2437 movaps %xmm2,%xmm5
2438.Lkey_expansion_192b_warm:
2439 shufps $16,%xmm0,%xmm4
2440 movdqa %xmm2,%xmm3
2441 xorps %xmm4,%xmm0
2442 shufps $140,%xmm0,%xmm4
2443 pslldq $4,%xmm3
2444 xorps %xmm4,%xmm0
2445 pshufd $85,%xmm1,%xmm1
2446 pxor %xmm3,%xmm2
2447 pxor %xmm1,%xmm0
2448 pshufd $255,%xmm0,%xmm3
2449 pxor %xmm3,%xmm2
2450 .byte 0xf3,0xc3
2451
2452.align 16
2453.Lkey_expansion_192b:
2454 movaps %xmm0,%xmm3
2455 shufps $68,%xmm0,%xmm5
2456 movups %xmm5,(%rax)
2457 shufps $78,%xmm2,%xmm3
2458 movups %xmm3,16(%rax)
2459 leaq 32(%rax),%rax
2460 jmp .Lkey_expansion_192b_warm
2461
2462.align 16
2463.Lkey_expansion_256a:
2464 movups %xmm2,(%rax)
2465 leaq 16(%rax),%rax
2466.Lkey_expansion_256a_cold:
2467 shufps $16,%xmm0,%xmm4
2468 xorps %xmm4,%xmm0
2469 shufps $140,%xmm0,%xmm4
2470 xorps %xmm4,%xmm0
2471 shufps $255,%xmm1,%xmm1
2472 xorps %xmm1,%xmm0
2473 .byte 0xf3,0xc3
2474
2475.align 16
2476.Lkey_expansion_256b:
2477 movups %xmm0,(%rax)
2478 leaq 16(%rax),%rax
2479
2480 shufps $16,%xmm2,%xmm4
2481 xorps %xmm4,%xmm2
2482 shufps $140,%xmm2,%xmm4
2483 xorps %xmm4,%xmm2
2484 shufps $170,%xmm1,%xmm1
2485 xorps %xmm1,%xmm2
2486 .byte 0xf3,0xc3
2487.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
2488.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
2489.align 64
2490.Lbswap_mask:
2491.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2492.Lincrement32:
2493.long 6,6,6,0
2494.Lincrement64:
2495.long 1,0,0,0
2496.Lxts_magic:
2497.long 0x87,0,1,0
2498.Lincrement1:
2499.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2500.Lkey_rotate:
2501.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
2502.Lkey_rotate192:
2503.long 0x04070605,0x04070605,0x04070605,0x04070605
2504.Lkey_rcon1:
2505.long 1,1,1,1
2506.Lkey_rcon1b:
2507.long 0x1b,0x1b,0x1b,0x1b
2508
2509.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2510.align 64
2511#endif
2512