1 | # This file is generated from a similarly-named Perl script in the BoringSSL |
2 | # source tree. Do not edit by hand. |
3 | |
4 | #if defined(__has_feature) |
5 | #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) |
6 | #define OPENSSL_NO_ASM |
7 | #endif |
8 | #endif |
9 | |
10 | #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) |
11 | #if defined(BORINGSSL_PREFIX) |
12 | #include <boringssl_prefix_symbols_asm.h> |
13 | #endif |
14 | .text |
15 | |
16 | .type _aesni_ctr32_ghash_6x,@function |
17 | .align 32 |
18 | _aesni_ctr32_ghash_6x: |
19 | .cfi_startproc |
20 | vmovdqu 32(%r11),%xmm2 |
21 | subq $6,%rdx |
22 | vpxor %xmm4,%xmm4,%xmm4 |
23 | vmovdqu 0-128(%rcx),%xmm15 |
24 | vpaddb %xmm2,%xmm1,%xmm10 |
25 | vpaddb %xmm2,%xmm10,%xmm11 |
26 | vpaddb %xmm2,%xmm11,%xmm12 |
27 | vpaddb %xmm2,%xmm12,%xmm13 |
28 | vpaddb %xmm2,%xmm13,%xmm14 |
29 | vpxor %xmm15,%xmm1,%xmm9 |
30 | vmovdqu %xmm4,16+8(%rsp) |
31 | jmp .Loop6x |
32 | |
33 | .align 32 |
34 | .Loop6x: |
35 | addl $100663296,%ebx |
36 | jc .Lhandle_ctr32 |
37 | vmovdqu 0-32(%r9),%xmm3 |
38 | vpaddb %xmm2,%xmm14,%xmm1 |
39 | vpxor %xmm15,%xmm10,%xmm10 |
40 | vpxor %xmm15,%xmm11,%xmm11 |
41 | |
42 | .Lresume_ctr32: |
43 | vmovdqu %xmm1,(%r8) |
44 | vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 |
45 | vpxor %xmm15,%xmm12,%xmm12 |
46 | vmovups 16-128(%rcx),%xmm2 |
47 | vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 |
48 | |
49 | |
50 | |
51 | |
52 | |
53 | |
54 | |
55 | |
56 | |
57 | |
58 | |
59 | |
60 | |
61 | |
62 | |
63 | |
64 | |
65 | xorq %r12,%r12 |
66 | cmpq %r14,%r15 |
67 | |
68 | vaesenc %xmm2,%xmm9,%xmm9 |
69 | vmovdqu 48+8(%rsp),%xmm0 |
70 | vpxor %xmm15,%xmm13,%xmm13 |
71 | vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 |
72 | vaesenc %xmm2,%xmm10,%xmm10 |
73 | vpxor %xmm15,%xmm14,%xmm14 |
74 | setnc %r12b |
75 | vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 |
76 | vaesenc %xmm2,%xmm11,%xmm11 |
77 | vmovdqu 16-32(%r9),%xmm3 |
78 | negq %r12 |
79 | vaesenc %xmm2,%xmm12,%xmm12 |
80 | vpxor %xmm5,%xmm6,%xmm6 |
81 | vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 |
82 | vpxor %xmm4,%xmm8,%xmm8 |
83 | vaesenc %xmm2,%xmm13,%xmm13 |
84 | vpxor %xmm5,%xmm1,%xmm4 |
85 | andq $0x60,%r12 |
86 | vmovups 32-128(%rcx),%xmm15 |
87 | vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 |
88 | vaesenc %xmm2,%xmm14,%xmm14 |
89 | |
90 | vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 |
91 | leaq (%r14,%r12,1),%r14 |
92 | vaesenc %xmm15,%xmm9,%xmm9 |
93 | vpxor 16+8(%rsp),%xmm8,%xmm8 |
94 | vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 |
95 | vmovdqu 64+8(%rsp),%xmm0 |
96 | vaesenc %xmm15,%xmm10,%xmm10 |
97 | movbeq 88(%r14),%r13 |
98 | vaesenc %xmm15,%xmm11,%xmm11 |
99 | movbeq 80(%r14),%r12 |
100 | vaesenc %xmm15,%xmm12,%xmm12 |
101 | movq %r13,32+8(%rsp) |
102 | vaesenc %xmm15,%xmm13,%xmm13 |
103 | movq %r12,40+8(%rsp) |
104 | vmovdqu 48-32(%r9),%xmm5 |
105 | vaesenc %xmm15,%xmm14,%xmm14 |
106 | |
107 | vmovups 48-128(%rcx),%xmm15 |
108 | vpxor %xmm1,%xmm6,%xmm6 |
109 | vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 |
110 | vaesenc %xmm15,%xmm9,%xmm9 |
111 | vpxor %xmm2,%xmm6,%xmm6 |
112 | vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 |
113 | vaesenc %xmm15,%xmm10,%xmm10 |
114 | vpxor %xmm3,%xmm7,%xmm7 |
115 | vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 |
116 | vaesenc %xmm15,%xmm11,%xmm11 |
117 | vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 |
118 | vmovdqu 80+8(%rsp),%xmm0 |
119 | vaesenc %xmm15,%xmm12,%xmm12 |
120 | vaesenc %xmm15,%xmm13,%xmm13 |
121 | vpxor %xmm1,%xmm4,%xmm4 |
122 | vmovdqu 64-32(%r9),%xmm1 |
123 | vaesenc %xmm15,%xmm14,%xmm14 |
124 | |
125 | vmovups 64-128(%rcx),%xmm15 |
126 | vpxor %xmm2,%xmm6,%xmm6 |
127 | vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 |
128 | vaesenc %xmm15,%xmm9,%xmm9 |
129 | vpxor %xmm3,%xmm6,%xmm6 |
130 | vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 |
131 | vaesenc %xmm15,%xmm10,%xmm10 |
132 | movbeq 72(%r14),%r13 |
133 | vpxor %xmm5,%xmm7,%xmm7 |
134 | vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 |
135 | vaesenc %xmm15,%xmm11,%xmm11 |
136 | movbeq 64(%r14),%r12 |
137 | vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 |
138 | vmovdqu 96+8(%rsp),%xmm0 |
139 | vaesenc %xmm15,%xmm12,%xmm12 |
140 | movq %r13,48+8(%rsp) |
141 | vaesenc %xmm15,%xmm13,%xmm13 |
142 | movq %r12,56+8(%rsp) |
143 | vpxor %xmm2,%xmm4,%xmm4 |
144 | vmovdqu 96-32(%r9),%xmm2 |
145 | vaesenc %xmm15,%xmm14,%xmm14 |
146 | |
147 | vmovups 80-128(%rcx),%xmm15 |
148 | vpxor %xmm3,%xmm6,%xmm6 |
149 | vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 |
150 | vaesenc %xmm15,%xmm9,%xmm9 |
151 | vpxor %xmm5,%xmm6,%xmm6 |
152 | vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 |
153 | vaesenc %xmm15,%xmm10,%xmm10 |
154 | movbeq 56(%r14),%r13 |
155 | vpxor %xmm1,%xmm7,%xmm7 |
156 | vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 |
157 | vpxor 112+8(%rsp),%xmm8,%xmm8 |
158 | vaesenc %xmm15,%xmm11,%xmm11 |
159 | movbeq 48(%r14),%r12 |
160 | vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 |
161 | vaesenc %xmm15,%xmm12,%xmm12 |
162 | movq %r13,64+8(%rsp) |
163 | vaesenc %xmm15,%xmm13,%xmm13 |
164 | movq %r12,72+8(%rsp) |
165 | vpxor %xmm3,%xmm4,%xmm4 |
166 | vmovdqu 112-32(%r9),%xmm3 |
167 | vaesenc %xmm15,%xmm14,%xmm14 |
168 | |
169 | vmovups 96-128(%rcx),%xmm15 |
170 | vpxor %xmm5,%xmm6,%xmm6 |
171 | vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 |
172 | vaesenc %xmm15,%xmm9,%xmm9 |
173 | vpxor %xmm1,%xmm6,%xmm6 |
174 | vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 |
175 | vaesenc %xmm15,%xmm10,%xmm10 |
176 | movbeq 40(%r14),%r13 |
177 | vpxor %xmm2,%xmm7,%xmm7 |
178 | vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 |
179 | vaesenc %xmm15,%xmm11,%xmm11 |
180 | movbeq 32(%r14),%r12 |
181 | vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 |
182 | vaesenc %xmm15,%xmm12,%xmm12 |
183 | movq %r13,80+8(%rsp) |
184 | vaesenc %xmm15,%xmm13,%xmm13 |
185 | movq %r12,88+8(%rsp) |
186 | vpxor %xmm5,%xmm6,%xmm6 |
187 | vaesenc %xmm15,%xmm14,%xmm14 |
188 | vpxor %xmm1,%xmm6,%xmm6 |
189 | |
190 | vmovups 112-128(%rcx),%xmm15 |
191 | vpslldq $8,%xmm6,%xmm5 |
192 | vpxor %xmm2,%xmm4,%xmm4 |
193 | vmovdqu 16(%r11),%xmm3 |
194 | |
195 | vaesenc %xmm15,%xmm9,%xmm9 |
196 | vpxor %xmm8,%xmm7,%xmm7 |
197 | vaesenc %xmm15,%xmm10,%xmm10 |
198 | vpxor %xmm5,%xmm4,%xmm4 |
199 | movbeq 24(%r14),%r13 |
200 | vaesenc %xmm15,%xmm11,%xmm11 |
201 | movbeq 16(%r14),%r12 |
202 | vpalignr $8,%xmm4,%xmm4,%xmm0 |
203 | vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 |
204 | movq %r13,96+8(%rsp) |
205 | vaesenc %xmm15,%xmm12,%xmm12 |
206 | movq %r12,104+8(%rsp) |
207 | vaesenc %xmm15,%xmm13,%xmm13 |
208 | vmovups 128-128(%rcx),%xmm1 |
209 | vaesenc %xmm15,%xmm14,%xmm14 |
210 | |
211 | vaesenc %xmm1,%xmm9,%xmm9 |
212 | vmovups 144-128(%rcx),%xmm15 |
213 | vaesenc %xmm1,%xmm10,%xmm10 |
214 | vpsrldq $8,%xmm6,%xmm6 |
215 | vaesenc %xmm1,%xmm11,%xmm11 |
216 | vpxor %xmm6,%xmm7,%xmm7 |
217 | vaesenc %xmm1,%xmm12,%xmm12 |
218 | vpxor %xmm0,%xmm4,%xmm4 |
219 | movbeq 8(%r14),%r13 |
220 | vaesenc %xmm1,%xmm13,%xmm13 |
221 | movbeq 0(%r14),%r12 |
222 | vaesenc %xmm1,%xmm14,%xmm14 |
223 | vmovups 160-128(%rcx),%xmm1 |
224 | cmpl $11,%ebp |
225 | jb .Lenc_tail |
226 | |
227 | vaesenc %xmm15,%xmm9,%xmm9 |
228 | vaesenc %xmm15,%xmm10,%xmm10 |
229 | vaesenc %xmm15,%xmm11,%xmm11 |
230 | vaesenc %xmm15,%xmm12,%xmm12 |
231 | vaesenc %xmm15,%xmm13,%xmm13 |
232 | vaesenc %xmm15,%xmm14,%xmm14 |
233 | |
234 | vaesenc %xmm1,%xmm9,%xmm9 |
235 | vaesenc %xmm1,%xmm10,%xmm10 |
236 | vaesenc %xmm1,%xmm11,%xmm11 |
237 | vaesenc %xmm1,%xmm12,%xmm12 |
238 | vaesenc %xmm1,%xmm13,%xmm13 |
239 | vmovups 176-128(%rcx),%xmm15 |
240 | vaesenc %xmm1,%xmm14,%xmm14 |
241 | vmovups 192-128(%rcx),%xmm1 |
242 | je .Lenc_tail |
243 | |
244 | vaesenc %xmm15,%xmm9,%xmm9 |
245 | vaesenc %xmm15,%xmm10,%xmm10 |
246 | vaesenc %xmm15,%xmm11,%xmm11 |
247 | vaesenc %xmm15,%xmm12,%xmm12 |
248 | vaesenc %xmm15,%xmm13,%xmm13 |
249 | vaesenc %xmm15,%xmm14,%xmm14 |
250 | |
251 | vaesenc %xmm1,%xmm9,%xmm9 |
252 | vaesenc %xmm1,%xmm10,%xmm10 |
253 | vaesenc %xmm1,%xmm11,%xmm11 |
254 | vaesenc %xmm1,%xmm12,%xmm12 |
255 | vaesenc %xmm1,%xmm13,%xmm13 |
256 | vmovups 208-128(%rcx),%xmm15 |
257 | vaesenc %xmm1,%xmm14,%xmm14 |
258 | vmovups 224-128(%rcx),%xmm1 |
259 | jmp .Lenc_tail |
260 | |
261 | .align 32 |
262 | .Lhandle_ctr32: |
263 | vmovdqu (%r11),%xmm0 |
264 | vpshufb %xmm0,%xmm1,%xmm6 |
265 | vmovdqu 48(%r11),%xmm5 |
266 | vpaddd 64(%r11),%xmm6,%xmm10 |
267 | vpaddd %xmm5,%xmm6,%xmm11 |
268 | vmovdqu 0-32(%r9),%xmm3 |
269 | vpaddd %xmm5,%xmm10,%xmm12 |
270 | vpshufb %xmm0,%xmm10,%xmm10 |
271 | vpaddd %xmm5,%xmm11,%xmm13 |
272 | vpshufb %xmm0,%xmm11,%xmm11 |
273 | vpxor %xmm15,%xmm10,%xmm10 |
274 | vpaddd %xmm5,%xmm12,%xmm14 |
275 | vpshufb %xmm0,%xmm12,%xmm12 |
276 | vpxor %xmm15,%xmm11,%xmm11 |
277 | vpaddd %xmm5,%xmm13,%xmm1 |
278 | vpshufb %xmm0,%xmm13,%xmm13 |
279 | vpshufb %xmm0,%xmm14,%xmm14 |
280 | vpshufb %xmm0,%xmm1,%xmm1 |
281 | jmp .Lresume_ctr32 |
282 | |
283 | .align 32 |
284 | .Lenc_tail: |
285 | vaesenc %xmm15,%xmm9,%xmm9 |
286 | vmovdqu %xmm7,16+8(%rsp) |
287 | vpalignr $8,%xmm4,%xmm4,%xmm8 |
288 | vaesenc %xmm15,%xmm10,%xmm10 |
289 | vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 |
290 | vpxor 0(%rdi),%xmm1,%xmm2 |
291 | vaesenc %xmm15,%xmm11,%xmm11 |
292 | vpxor 16(%rdi),%xmm1,%xmm0 |
293 | vaesenc %xmm15,%xmm12,%xmm12 |
294 | vpxor 32(%rdi),%xmm1,%xmm5 |
295 | vaesenc %xmm15,%xmm13,%xmm13 |
296 | vpxor 48(%rdi),%xmm1,%xmm6 |
297 | vaesenc %xmm15,%xmm14,%xmm14 |
298 | vpxor 64(%rdi),%xmm1,%xmm7 |
299 | vpxor 80(%rdi),%xmm1,%xmm3 |
300 | vmovdqu (%r8),%xmm1 |
301 | |
302 | vaesenclast %xmm2,%xmm9,%xmm9 |
303 | vmovdqu 32(%r11),%xmm2 |
304 | vaesenclast %xmm0,%xmm10,%xmm10 |
305 | vpaddb %xmm2,%xmm1,%xmm0 |
306 | movq %r13,112+8(%rsp) |
307 | leaq 96(%rdi),%rdi |
308 | vaesenclast %xmm5,%xmm11,%xmm11 |
309 | vpaddb %xmm2,%xmm0,%xmm5 |
310 | movq %r12,120+8(%rsp) |
311 | leaq 96(%rsi),%rsi |
312 | vmovdqu 0-128(%rcx),%xmm15 |
313 | vaesenclast %xmm6,%xmm12,%xmm12 |
314 | vpaddb %xmm2,%xmm5,%xmm6 |
315 | vaesenclast %xmm7,%xmm13,%xmm13 |
316 | vpaddb %xmm2,%xmm6,%xmm7 |
317 | vaesenclast %xmm3,%xmm14,%xmm14 |
318 | vpaddb %xmm2,%xmm7,%xmm3 |
319 | |
320 | addq $0x60,%r10 |
321 | subq $0x6,%rdx |
322 | jc .L6x_done |
323 | |
324 | vmovups %xmm9,-96(%rsi) |
325 | vpxor %xmm15,%xmm1,%xmm9 |
326 | vmovups %xmm10,-80(%rsi) |
327 | vmovdqa %xmm0,%xmm10 |
328 | vmovups %xmm11,-64(%rsi) |
329 | vmovdqa %xmm5,%xmm11 |
330 | vmovups %xmm12,-48(%rsi) |
331 | vmovdqa %xmm6,%xmm12 |
332 | vmovups %xmm13,-32(%rsi) |
333 | vmovdqa %xmm7,%xmm13 |
334 | vmovups %xmm14,-16(%rsi) |
335 | vmovdqa %xmm3,%xmm14 |
336 | vmovdqu 32+8(%rsp),%xmm7 |
337 | jmp .Loop6x |
338 | |
339 | .L6x_done: |
340 | vpxor 16+8(%rsp),%xmm8,%xmm8 |
341 | vpxor %xmm4,%xmm8,%xmm8 |
342 | |
343 | .byte 0xf3,0xc3 |
344 | .cfi_endproc |
345 | .size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x |
346 | .globl aesni_gcm_decrypt |
347 | .hidden aesni_gcm_decrypt |
348 | .type aesni_gcm_decrypt,@function |
349 | .align 32 |
350 | aesni_gcm_decrypt: |
351 | .cfi_startproc |
352 | xorq %r10,%r10 |
353 | |
354 | |
355 | |
356 | cmpq $0x60,%rdx |
357 | jb .Lgcm_dec_abort |
358 | |
359 | leaq (%rsp),%rax |
360 | .cfi_def_cfa_register %rax |
361 | pushq %rbx |
362 | .cfi_offset %rbx,-16 |
363 | pushq %rbp |
364 | .cfi_offset %rbp,-24 |
365 | pushq %r12 |
366 | .cfi_offset %r12,-32 |
367 | pushq %r13 |
368 | .cfi_offset %r13,-40 |
369 | pushq %r14 |
370 | .cfi_offset %r14,-48 |
371 | pushq %r15 |
372 | .cfi_offset %r15,-56 |
373 | vzeroupper |
374 | |
375 | vmovdqu (%r8),%xmm1 |
376 | addq $-128,%rsp |
377 | movl 12(%r8),%ebx |
378 | leaq .Lbswap_mask(%rip),%r11 |
379 | leaq -128(%rcx),%r14 |
380 | movq $0xf80,%r15 |
381 | vmovdqu (%r9),%xmm8 |
382 | andq $-128,%rsp |
383 | vmovdqu (%r11),%xmm0 |
384 | leaq 128(%rcx),%rcx |
385 | leaq 32+32(%r9),%r9 |
386 | movl 240-128(%rcx),%ebp |
387 | vpshufb %xmm0,%xmm8,%xmm8 |
388 | |
389 | andq %r15,%r14 |
390 | andq %rsp,%r15 |
391 | subq %r14,%r15 |
392 | jc .Ldec_no_key_aliasing |
393 | cmpq $768,%r15 |
394 | jnc .Ldec_no_key_aliasing |
395 | subq %r15,%rsp |
396 | .Ldec_no_key_aliasing: |
397 | |
398 | vmovdqu 80(%rdi),%xmm7 |
399 | leaq (%rdi),%r14 |
400 | vmovdqu 64(%rdi),%xmm4 |
401 | |
402 | |
403 | |
404 | |
405 | |
406 | |
407 | |
408 | leaq -192(%rdi,%rdx,1),%r15 |
409 | |
410 | vmovdqu 48(%rdi),%xmm5 |
411 | shrq $4,%rdx |
412 | xorq %r10,%r10 |
413 | vmovdqu 32(%rdi),%xmm6 |
414 | vpshufb %xmm0,%xmm7,%xmm7 |
415 | vmovdqu 16(%rdi),%xmm2 |
416 | vpshufb %xmm0,%xmm4,%xmm4 |
417 | vmovdqu (%rdi),%xmm3 |
418 | vpshufb %xmm0,%xmm5,%xmm5 |
419 | vmovdqu %xmm4,48(%rsp) |
420 | vpshufb %xmm0,%xmm6,%xmm6 |
421 | vmovdqu %xmm5,64(%rsp) |
422 | vpshufb %xmm0,%xmm2,%xmm2 |
423 | vmovdqu %xmm6,80(%rsp) |
424 | vpshufb %xmm0,%xmm3,%xmm3 |
425 | vmovdqu %xmm2,96(%rsp) |
426 | vmovdqu %xmm3,112(%rsp) |
427 | |
428 | call _aesni_ctr32_ghash_6x |
429 | |
430 | vmovups %xmm9,-96(%rsi) |
431 | vmovups %xmm10,-80(%rsi) |
432 | vmovups %xmm11,-64(%rsi) |
433 | vmovups %xmm12,-48(%rsi) |
434 | vmovups %xmm13,-32(%rsi) |
435 | vmovups %xmm14,-16(%rsi) |
436 | |
437 | vpshufb (%r11),%xmm8,%xmm8 |
438 | vmovdqu %xmm8,-64(%r9) |
439 | |
440 | vzeroupper |
441 | movq -48(%rax),%r15 |
442 | .cfi_restore %r15 |
443 | movq -40(%rax),%r14 |
444 | .cfi_restore %r14 |
445 | movq -32(%rax),%r13 |
446 | .cfi_restore %r13 |
447 | movq -24(%rax),%r12 |
448 | .cfi_restore %r12 |
449 | movq -16(%rax),%rbp |
450 | .cfi_restore %rbp |
451 | movq -8(%rax),%rbx |
452 | .cfi_restore %rbx |
453 | leaq (%rax),%rsp |
454 | .cfi_def_cfa_register %rsp |
455 | .Lgcm_dec_abort: |
456 | movq %r10,%rax |
457 | .byte 0xf3,0xc3 |
458 | .cfi_endproc |
459 | .size aesni_gcm_decrypt,.-aesni_gcm_decrypt |
460 | .type _aesni_ctr32_6x,@function |
461 | .align 32 |
462 | _aesni_ctr32_6x: |
463 | .cfi_startproc |
464 | vmovdqu 0-128(%rcx),%xmm4 |
465 | vmovdqu 32(%r11),%xmm2 |
466 | leaq -1(%rbp),%r13 |
467 | vmovups 16-128(%rcx),%xmm15 |
468 | leaq 32-128(%rcx),%r12 |
469 | vpxor %xmm4,%xmm1,%xmm9 |
470 | addl $100663296,%ebx |
471 | jc .Lhandle_ctr32_2 |
472 | vpaddb %xmm2,%xmm1,%xmm10 |
473 | vpaddb %xmm2,%xmm10,%xmm11 |
474 | vpxor %xmm4,%xmm10,%xmm10 |
475 | vpaddb %xmm2,%xmm11,%xmm12 |
476 | vpxor %xmm4,%xmm11,%xmm11 |
477 | vpaddb %xmm2,%xmm12,%xmm13 |
478 | vpxor %xmm4,%xmm12,%xmm12 |
479 | vpaddb %xmm2,%xmm13,%xmm14 |
480 | vpxor %xmm4,%xmm13,%xmm13 |
481 | vpaddb %xmm2,%xmm14,%xmm1 |
482 | vpxor %xmm4,%xmm14,%xmm14 |
483 | jmp .Loop_ctr32 |
484 | |
485 | .align 16 |
486 | .Loop_ctr32: |
487 | vaesenc %xmm15,%xmm9,%xmm9 |
488 | vaesenc %xmm15,%xmm10,%xmm10 |
489 | vaesenc %xmm15,%xmm11,%xmm11 |
490 | vaesenc %xmm15,%xmm12,%xmm12 |
491 | vaesenc %xmm15,%xmm13,%xmm13 |
492 | vaesenc %xmm15,%xmm14,%xmm14 |
493 | vmovups (%r12),%xmm15 |
494 | leaq 16(%r12),%r12 |
495 | decl %r13d |
496 | jnz .Loop_ctr32 |
497 | |
498 | vmovdqu (%r12),%xmm3 |
499 | vaesenc %xmm15,%xmm9,%xmm9 |
500 | vpxor 0(%rdi),%xmm3,%xmm4 |
501 | vaesenc %xmm15,%xmm10,%xmm10 |
502 | vpxor 16(%rdi),%xmm3,%xmm5 |
503 | vaesenc %xmm15,%xmm11,%xmm11 |
504 | vpxor 32(%rdi),%xmm3,%xmm6 |
505 | vaesenc %xmm15,%xmm12,%xmm12 |
506 | vpxor 48(%rdi),%xmm3,%xmm8 |
507 | vaesenc %xmm15,%xmm13,%xmm13 |
508 | vpxor 64(%rdi),%xmm3,%xmm2 |
509 | vaesenc %xmm15,%xmm14,%xmm14 |
510 | vpxor 80(%rdi),%xmm3,%xmm3 |
511 | leaq 96(%rdi),%rdi |
512 | |
513 | vaesenclast %xmm4,%xmm9,%xmm9 |
514 | vaesenclast %xmm5,%xmm10,%xmm10 |
515 | vaesenclast %xmm6,%xmm11,%xmm11 |
516 | vaesenclast %xmm8,%xmm12,%xmm12 |
517 | vaesenclast %xmm2,%xmm13,%xmm13 |
518 | vaesenclast %xmm3,%xmm14,%xmm14 |
519 | vmovups %xmm9,0(%rsi) |
520 | vmovups %xmm10,16(%rsi) |
521 | vmovups %xmm11,32(%rsi) |
522 | vmovups %xmm12,48(%rsi) |
523 | vmovups %xmm13,64(%rsi) |
524 | vmovups %xmm14,80(%rsi) |
525 | leaq 96(%rsi),%rsi |
526 | |
527 | .byte 0xf3,0xc3 |
528 | .align 32 |
529 | .Lhandle_ctr32_2: |
530 | vpshufb %xmm0,%xmm1,%xmm6 |
531 | vmovdqu 48(%r11),%xmm5 |
532 | vpaddd 64(%r11),%xmm6,%xmm10 |
533 | vpaddd %xmm5,%xmm6,%xmm11 |
534 | vpaddd %xmm5,%xmm10,%xmm12 |
535 | vpshufb %xmm0,%xmm10,%xmm10 |
536 | vpaddd %xmm5,%xmm11,%xmm13 |
537 | vpshufb %xmm0,%xmm11,%xmm11 |
538 | vpxor %xmm4,%xmm10,%xmm10 |
539 | vpaddd %xmm5,%xmm12,%xmm14 |
540 | vpshufb %xmm0,%xmm12,%xmm12 |
541 | vpxor %xmm4,%xmm11,%xmm11 |
542 | vpaddd %xmm5,%xmm13,%xmm1 |
543 | vpshufb %xmm0,%xmm13,%xmm13 |
544 | vpxor %xmm4,%xmm12,%xmm12 |
545 | vpshufb %xmm0,%xmm14,%xmm14 |
546 | vpxor %xmm4,%xmm13,%xmm13 |
547 | vpshufb %xmm0,%xmm1,%xmm1 |
548 | vpxor %xmm4,%xmm14,%xmm14 |
549 | jmp .Loop_ctr32 |
550 | .cfi_endproc |
551 | .size _aesni_ctr32_6x,.-_aesni_ctr32_6x |
552 | |
553 | .globl aesni_gcm_encrypt |
554 | .hidden aesni_gcm_encrypt |
555 | .type aesni_gcm_encrypt,@function |
556 | .align 32 |
557 | aesni_gcm_encrypt: |
558 | .cfi_startproc |
559 | #ifndef NDEBUG |
560 | #ifndef BORINGSSL_FIPS |
561 | .extern BORINGSSL_function_hit |
562 | .hidden BORINGSSL_function_hit |
563 | movb $1,BORINGSSL_function_hit+2(%rip) |
564 | #endif |
565 | #endif |
566 | xorq %r10,%r10 |
567 | |
568 | |
569 | |
570 | |
571 | cmpq $288,%rdx |
572 | jb .Lgcm_enc_abort |
573 | |
574 | leaq (%rsp),%rax |
575 | .cfi_def_cfa_register %rax |
576 | pushq %rbx |
577 | .cfi_offset %rbx,-16 |
578 | pushq %rbp |
579 | .cfi_offset %rbp,-24 |
580 | pushq %r12 |
581 | .cfi_offset %r12,-32 |
582 | pushq %r13 |
583 | .cfi_offset %r13,-40 |
584 | pushq %r14 |
585 | .cfi_offset %r14,-48 |
586 | pushq %r15 |
587 | .cfi_offset %r15,-56 |
588 | vzeroupper |
589 | |
590 | vmovdqu (%r8),%xmm1 |
591 | addq $-128,%rsp |
592 | movl 12(%r8),%ebx |
593 | leaq .Lbswap_mask(%rip),%r11 |
594 | leaq -128(%rcx),%r14 |
595 | movq $0xf80,%r15 |
596 | leaq 128(%rcx),%rcx |
597 | vmovdqu (%r11),%xmm0 |
598 | andq $-128,%rsp |
599 | movl 240-128(%rcx),%ebp |
600 | |
601 | andq %r15,%r14 |
602 | andq %rsp,%r15 |
603 | subq %r14,%r15 |
604 | jc .Lenc_no_key_aliasing |
605 | cmpq $768,%r15 |
606 | jnc .Lenc_no_key_aliasing |
607 | subq %r15,%rsp |
608 | .Lenc_no_key_aliasing: |
609 | |
610 | leaq (%rsi),%r14 |
611 | |
612 | |
613 | |
614 | |
615 | |
616 | |
617 | |
618 | |
619 | leaq -192(%rsi,%rdx,1),%r15 |
620 | |
621 | shrq $4,%rdx |
622 | |
623 | call _aesni_ctr32_6x |
624 | vpshufb %xmm0,%xmm9,%xmm8 |
625 | vpshufb %xmm0,%xmm10,%xmm2 |
626 | vmovdqu %xmm8,112(%rsp) |
627 | vpshufb %xmm0,%xmm11,%xmm4 |
628 | vmovdqu %xmm2,96(%rsp) |
629 | vpshufb %xmm0,%xmm12,%xmm5 |
630 | vmovdqu %xmm4,80(%rsp) |
631 | vpshufb %xmm0,%xmm13,%xmm6 |
632 | vmovdqu %xmm5,64(%rsp) |
633 | vpshufb %xmm0,%xmm14,%xmm7 |
634 | vmovdqu %xmm6,48(%rsp) |
635 | |
636 | call _aesni_ctr32_6x |
637 | |
638 | vmovdqu (%r9),%xmm8 |
639 | leaq 32+32(%r9),%r9 |
640 | subq $12,%rdx |
641 | movq $192,%r10 |
642 | vpshufb %xmm0,%xmm8,%xmm8 |
643 | |
644 | call _aesni_ctr32_ghash_6x |
645 | vmovdqu 32(%rsp),%xmm7 |
646 | vmovdqu (%r11),%xmm0 |
647 | vmovdqu 0-32(%r9),%xmm3 |
648 | vpunpckhqdq %xmm7,%xmm7,%xmm1 |
649 | vmovdqu 32-32(%r9),%xmm15 |
650 | vmovups %xmm9,-96(%rsi) |
651 | vpshufb %xmm0,%xmm9,%xmm9 |
652 | vpxor %xmm7,%xmm1,%xmm1 |
653 | vmovups %xmm10,-80(%rsi) |
654 | vpshufb %xmm0,%xmm10,%xmm10 |
655 | vmovups %xmm11,-64(%rsi) |
656 | vpshufb %xmm0,%xmm11,%xmm11 |
657 | vmovups %xmm12,-48(%rsi) |
658 | vpshufb %xmm0,%xmm12,%xmm12 |
659 | vmovups %xmm13,-32(%rsi) |
660 | vpshufb %xmm0,%xmm13,%xmm13 |
661 | vmovups %xmm14,-16(%rsi) |
662 | vpshufb %xmm0,%xmm14,%xmm14 |
663 | vmovdqu %xmm9,16(%rsp) |
664 | vmovdqu 48(%rsp),%xmm6 |
665 | vmovdqu 16-32(%r9),%xmm0 |
666 | vpunpckhqdq %xmm6,%xmm6,%xmm2 |
667 | vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 |
668 | vpxor %xmm6,%xmm2,%xmm2 |
669 | vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 |
670 | vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 |
671 | |
672 | vmovdqu 64(%rsp),%xmm9 |
673 | vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 |
674 | vmovdqu 48-32(%r9),%xmm3 |
675 | vpxor %xmm5,%xmm4,%xmm4 |
676 | vpunpckhqdq %xmm9,%xmm9,%xmm5 |
677 | vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 |
678 | vpxor %xmm9,%xmm5,%xmm5 |
679 | vpxor %xmm7,%xmm6,%xmm6 |
680 | vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 |
681 | vmovdqu 80-32(%r9),%xmm15 |
682 | vpxor %xmm1,%xmm2,%xmm2 |
683 | |
684 | vmovdqu 80(%rsp),%xmm1 |
685 | vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 |
686 | vmovdqu 64-32(%r9),%xmm0 |
687 | vpxor %xmm4,%xmm7,%xmm7 |
688 | vpunpckhqdq %xmm1,%xmm1,%xmm4 |
689 | vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 |
690 | vpxor %xmm1,%xmm4,%xmm4 |
691 | vpxor %xmm6,%xmm9,%xmm9 |
692 | vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 |
693 | vpxor %xmm2,%xmm5,%xmm5 |
694 | |
695 | vmovdqu 96(%rsp),%xmm2 |
696 | vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 |
697 | vmovdqu 96-32(%r9),%xmm3 |
698 | vpxor %xmm7,%xmm6,%xmm6 |
699 | vpunpckhqdq %xmm2,%xmm2,%xmm7 |
700 | vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 |
701 | vpxor %xmm2,%xmm7,%xmm7 |
702 | vpxor %xmm9,%xmm1,%xmm1 |
703 | vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 |
704 | vmovdqu 128-32(%r9),%xmm15 |
705 | vpxor %xmm5,%xmm4,%xmm4 |
706 | |
707 | vpxor 112(%rsp),%xmm8,%xmm8 |
708 | vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 |
709 | vmovdqu 112-32(%r9),%xmm0 |
710 | vpunpckhqdq %xmm8,%xmm8,%xmm9 |
711 | vpxor %xmm6,%xmm5,%xmm5 |
712 | vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 |
713 | vpxor %xmm8,%xmm9,%xmm9 |
714 | vpxor %xmm1,%xmm2,%xmm2 |
715 | vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 |
716 | vpxor %xmm4,%xmm7,%xmm4 |
717 | |
718 | vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 |
719 | vmovdqu 0-32(%r9),%xmm3 |
720 | vpunpckhqdq %xmm14,%xmm14,%xmm1 |
721 | vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 |
722 | vpxor %xmm14,%xmm1,%xmm1 |
723 | vpxor %xmm5,%xmm6,%xmm5 |
724 | vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 |
725 | vmovdqu 32-32(%r9),%xmm15 |
726 | vpxor %xmm2,%xmm8,%xmm7 |
727 | vpxor %xmm4,%xmm9,%xmm6 |
728 | |
729 | vmovdqu 16-32(%r9),%xmm0 |
730 | vpxor %xmm5,%xmm7,%xmm9 |
731 | vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 |
732 | vpxor %xmm9,%xmm6,%xmm6 |
733 | vpunpckhqdq %xmm13,%xmm13,%xmm2 |
734 | vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 |
735 | vpxor %xmm13,%xmm2,%xmm2 |
736 | vpslldq $8,%xmm6,%xmm9 |
737 | vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 |
738 | vpxor %xmm9,%xmm5,%xmm8 |
739 | vpsrldq $8,%xmm6,%xmm6 |
740 | vpxor %xmm6,%xmm7,%xmm7 |
741 | |
742 | vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 |
743 | vmovdqu 48-32(%r9),%xmm3 |
744 | vpxor %xmm4,%xmm5,%xmm5 |
745 | vpunpckhqdq %xmm12,%xmm12,%xmm9 |
746 | vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 |
747 | vpxor %xmm12,%xmm9,%xmm9 |
748 | vpxor %xmm14,%xmm13,%xmm13 |
749 | vpalignr $8,%xmm8,%xmm8,%xmm14 |
750 | vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 |
751 | vmovdqu 80-32(%r9),%xmm15 |
752 | vpxor %xmm1,%xmm2,%xmm2 |
753 | |
754 | vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 |
755 | vmovdqu 64-32(%r9),%xmm0 |
756 | vpxor %xmm5,%xmm4,%xmm4 |
757 | vpunpckhqdq %xmm11,%xmm11,%xmm1 |
758 | vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 |
759 | vpxor %xmm11,%xmm1,%xmm1 |
760 | vpxor %xmm13,%xmm12,%xmm12 |
761 | vxorps 16(%rsp),%xmm7,%xmm7 |
762 | vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 |
763 | vpxor %xmm2,%xmm9,%xmm9 |
764 | |
765 | vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 |
766 | vxorps %xmm14,%xmm8,%xmm8 |
767 | |
768 | vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 |
769 | vmovdqu 96-32(%r9),%xmm3 |
770 | vpxor %xmm4,%xmm5,%xmm5 |
771 | vpunpckhqdq %xmm10,%xmm10,%xmm2 |
772 | vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 |
773 | vpxor %xmm10,%xmm2,%xmm2 |
774 | vpalignr $8,%xmm8,%xmm8,%xmm14 |
775 | vpxor %xmm12,%xmm11,%xmm11 |
776 | vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 |
777 | vmovdqu 128-32(%r9),%xmm15 |
778 | vpxor %xmm9,%xmm1,%xmm1 |
779 | |
780 | vxorps %xmm7,%xmm14,%xmm14 |
781 | vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 |
782 | vxorps %xmm14,%xmm8,%xmm8 |
783 | |
784 | vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 |
785 | vmovdqu 112-32(%r9),%xmm0 |
786 | vpxor %xmm5,%xmm4,%xmm4 |
787 | vpunpckhqdq %xmm8,%xmm8,%xmm9 |
788 | vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 |
789 | vpxor %xmm8,%xmm9,%xmm9 |
790 | vpxor %xmm11,%xmm10,%xmm10 |
791 | vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 |
792 | vpxor %xmm1,%xmm2,%xmm2 |
793 | |
794 | vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 |
795 | vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 |
796 | vpxor %xmm4,%xmm5,%xmm5 |
797 | vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 |
798 | vpxor %xmm10,%xmm7,%xmm7 |
799 | vpxor %xmm2,%xmm6,%xmm6 |
800 | |
801 | vpxor %xmm5,%xmm7,%xmm4 |
802 | vpxor %xmm4,%xmm6,%xmm6 |
803 | vpslldq $8,%xmm6,%xmm1 |
804 | vmovdqu 16(%r11),%xmm3 |
805 | vpsrldq $8,%xmm6,%xmm6 |
806 | vpxor %xmm1,%xmm5,%xmm8 |
807 | vpxor %xmm6,%xmm7,%xmm7 |
808 | |
809 | vpalignr $8,%xmm8,%xmm8,%xmm2 |
810 | vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 |
811 | vpxor %xmm2,%xmm8,%xmm8 |
812 | |
813 | vpalignr $8,%xmm8,%xmm8,%xmm2 |
814 | vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 |
815 | vpxor %xmm7,%xmm2,%xmm2 |
816 | vpxor %xmm2,%xmm8,%xmm8 |
817 | vpshufb (%r11),%xmm8,%xmm8 |
818 | vmovdqu %xmm8,-64(%r9) |
819 | |
820 | vzeroupper |
821 | movq -48(%rax),%r15 |
822 | .cfi_restore %r15 |
823 | movq -40(%rax),%r14 |
824 | .cfi_restore %r14 |
825 | movq -32(%rax),%r13 |
826 | .cfi_restore %r13 |
827 | movq -24(%rax),%r12 |
828 | .cfi_restore %r12 |
829 | movq -16(%rax),%rbp |
830 | .cfi_restore %rbp |
831 | movq -8(%rax),%rbx |
832 | .cfi_restore %rbx |
833 | leaq (%rax),%rsp |
834 | .cfi_def_cfa_register %rsp |
835 | .Lgcm_enc_abort: |
836 | movq %r10,%rax |
837 | .byte 0xf3,0xc3 |
838 | .cfi_endproc |
839 | .size aesni_gcm_encrypt,.-aesni_gcm_encrypt |
840 | .align 64 |
841 | .Lbswap_mask: |
842 | .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
843 | .Lpoly: |
844 | .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 |
845 | .Lone_msb: |
846 | .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 |
847 | .Ltwo_lsb: |
848 | .byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
849 | .Lone_lsb: |
850 | .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
851 | .byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
852 | .align 64 |
853 | #endif |
854 | |