1 | # This file is generated from a similarly-named Perl script in the BoringSSL |
2 | # source tree. Do not edit by hand. |
3 | |
4 | #if defined(__has_feature) |
5 | #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) |
6 | #define OPENSSL_NO_ASM |
7 | #endif |
8 | #endif |
9 | |
10 | #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) |
11 | #if defined(BORINGSSL_PREFIX) |
12 | #include <boringssl_prefix_symbols_asm.h> |
13 | #endif |
14 | .text |
15 | |
16 | .type beeu_mod_inverse_vartime,@function |
17 | .hidden beeu_mod_inverse_vartime |
18 | .globl beeu_mod_inverse_vartime |
19 | .hidden beeu_mod_inverse_vartime |
20 | .align 32 |
21 | beeu_mod_inverse_vartime: |
22 | .cfi_startproc |
23 | pushq %rbp |
24 | .cfi_adjust_cfa_offset 8 |
25 | .cfi_offset rbp,-16 |
26 | pushq %r12 |
27 | .cfi_adjust_cfa_offset 8 |
28 | .cfi_offset r12,-24 |
29 | pushq %r13 |
30 | .cfi_adjust_cfa_offset 8 |
31 | .cfi_offset r13,-32 |
32 | pushq %r14 |
33 | .cfi_adjust_cfa_offset 8 |
34 | .cfi_offset r14,-40 |
35 | pushq %r15 |
36 | .cfi_adjust_cfa_offset 8 |
37 | .cfi_offset r15,-48 |
38 | pushq %rbx |
39 | .cfi_adjust_cfa_offset 8 |
40 | .cfi_offset rbx,-56 |
41 | pushq %rsi |
42 | .cfi_adjust_cfa_offset 8 |
43 | .cfi_offset rsi,-64 |
44 | |
45 | subq $80,%rsp |
46 | .cfi_adjust_cfa_offset 80 |
47 | movq %rdi,0(%rsp) |
48 | |
49 | |
50 | movq $1,%r8 |
51 | xorq %r9,%r9 |
52 | xorq %r10,%r10 |
53 | xorq %r11,%r11 |
54 | xorq %rdi,%rdi |
55 | |
56 | xorq %r12,%r12 |
57 | xorq %r13,%r13 |
58 | xorq %r14,%r14 |
59 | xorq %r15,%r15 |
60 | xorq %rbp,%rbp |
61 | |
62 | |
63 | vmovdqu 0(%rsi),%xmm0 |
64 | vmovdqu 16(%rsi),%xmm1 |
65 | vmovdqu %xmm0,48(%rsp) |
66 | vmovdqu %xmm1,64(%rsp) |
67 | |
68 | vmovdqu 0(%rdx),%xmm0 |
69 | vmovdqu 16(%rdx),%xmm1 |
70 | vmovdqu %xmm0,16(%rsp) |
71 | vmovdqu %xmm1,32(%rsp) |
72 | |
73 | .Lbeeu_loop: |
74 | xorq %rbx,%rbx |
75 | orq 48(%rsp),%rbx |
76 | orq 56(%rsp),%rbx |
77 | orq 64(%rsp),%rbx |
78 | orq 72(%rsp),%rbx |
79 | jz .Lbeeu_loop_end |
80 | |
81 | |
82 | |
83 | |
84 | |
85 | |
86 | |
87 | |
88 | |
89 | |
90 | movq $1,%rcx |
91 | |
92 | |
93 | .Lbeeu_shift_loop_XB: |
94 | movq %rcx,%rbx |
95 | andq 48(%rsp),%rbx |
96 | jnz .Lbeeu_shift_loop_end_XB |
97 | |
98 | |
99 | movq $1,%rbx |
100 | andq %r8,%rbx |
101 | jz .Lshift1_0 |
102 | addq 0(%rdx),%r8 |
103 | adcq 8(%rdx),%r9 |
104 | adcq 16(%rdx),%r10 |
105 | adcq 24(%rdx),%r11 |
106 | adcq $0,%rdi |
107 | |
108 | .Lshift1_0: |
109 | shrdq $1,%r9,%r8 |
110 | shrdq $1,%r10,%r9 |
111 | shrdq $1,%r11,%r10 |
112 | shrdq $1,%rdi,%r11 |
113 | shrq $1,%rdi |
114 | |
115 | shlq $1,%rcx |
116 | |
117 | |
118 | |
119 | |
120 | |
121 | cmpq $0x8000000,%rcx |
122 | jne .Lbeeu_shift_loop_XB |
123 | |
124 | .Lbeeu_shift_loop_end_XB: |
125 | bsfq %rcx,%rcx |
126 | testq %rcx,%rcx |
127 | jz .Lbeeu_no_shift_XB |
128 | |
129 | |
130 | |
131 | movq 8+48(%rsp),%rax |
132 | movq 16+48(%rsp),%rbx |
133 | movq 24+48(%rsp),%rsi |
134 | |
135 | shrdq %cl,%rax,0+48(%rsp) |
136 | shrdq %cl,%rbx,8+48(%rsp) |
137 | shrdq %cl,%rsi,16+48(%rsp) |
138 | |
139 | shrq %cl,%rsi |
140 | movq %rsi,24+48(%rsp) |
141 | |
142 | |
143 | .Lbeeu_no_shift_XB: |
144 | |
145 | movq $1,%rcx |
146 | |
147 | |
148 | .Lbeeu_shift_loop_YA: |
149 | movq %rcx,%rbx |
150 | andq 16(%rsp),%rbx |
151 | jnz .Lbeeu_shift_loop_end_YA |
152 | |
153 | |
154 | movq $1,%rbx |
155 | andq %r12,%rbx |
156 | jz .Lshift1_1 |
157 | addq 0(%rdx),%r12 |
158 | adcq 8(%rdx),%r13 |
159 | adcq 16(%rdx),%r14 |
160 | adcq 24(%rdx),%r15 |
161 | adcq $0,%rbp |
162 | |
163 | .Lshift1_1: |
164 | shrdq $1,%r13,%r12 |
165 | shrdq $1,%r14,%r13 |
166 | shrdq $1,%r15,%r14 |
167 | shrdq $1,%rbp,%r15 |
168 | shrq $1,%rbp |
169 | |
170 | shlq $1,%rcx |
171 | |
172 | |
173 | |
174 | |
175 | |
176 | cmpq $0x8000000,%rcx |
177 | jne .Lbeeu_shift_loop_YA |
178 | |
179 | .Lbeeu_shift_loop_end_YA: |
180 | bsfq %rcx,%rcx |
181 | testq %rcx,%rcx |
182 | jz .Lbeeu_no_shift_YA |
183 | |
184 | |
185 | |
186 | movq 8+16(%rsp),%rax |
187 | movq 16+16(%rsp),%rbx |
188 | movq 24+16(%rsp),%rsi |
189 | |
190 | shrdq %cl,%rax,0+16(%rsp) |
191 | shrdq %cl,%rbx,8+16(%rsp) |
192 | shrdq %cl,%rsi,16+16(%rsp) |
193 | |
194 | shrq %cl,%rsi |
195 | movq %rsi,24+16(%rsp) |
196 | |
197 | |
198 | .Lbeeu_no_shift_YA: |
199 | |
200 | movq 48(%rsp),%rax |
201 | movq 56(%rsp),%rbx |
202 | movq 64(%rsp),%rsi |
203 | movq 72(%rsp),%rcx |
204 | subq 16(%rsp),%rax |
205 | sbbq 24(%rsp),%rbx |
206 | sbbq 32(%rsp),%rsi |
207 | sbbq 40(%rsp),%rcx |
208 | jnc .Lbeeu_B_bigger_than_A |
209 | |
210 | |
211 | movq 16(%rsp),%rax |
212 | movq 24(%rsp),%rbx |
213 | movq 32(%rsp),%rsi |
214 | movq 40(%rsp),%rcx |
215 | subq 48(%rsp),%rax |
216 | sbbq 56(%rsp),%rbx |
217 | sbbq 64(%rsp),%rsi |
218 | sbbq 72(%rsp),%rcx |
219 | movq %rax,16(%rsp) |
220 | movq %rbx,24(%rsp) |
221 | movq %rsi,32(%rsp) |
222 | movq %rcx,40(%rsp) |
223 | |
224 | |
225 | addq %r8,%r12 |
226 | adcq %r9,%r13 |
227 | adcq %r10,%r14 |
228 | adcq %r11,%r15 |
229 | adcq %rdi,%rbp |
230 | jmp .Lbeeu_loop |
231 | |
232 | .Lbeeu_B_bigger_than_A: |
233 | |
234 | movq %rax,48(%rsp) |
235 | movq %rbx,56(%rsp) |
236 | movq %rsi,64(%rsp) |
237 | movq %rcx,72(%rsp) |
238 | |
239 | |
240 | addq %r12,%r8 |
241 | adcq %r13,%r9 |
242 | adcq %r14,%r10 |
243 | adcq %r15,%r11 |
244 | adcq %rbp,%rdi |
245 | |
246 | jmp .Lbeeu_loop |
247 | |
248 | .Lbeeu_loop_end: |
249 | |
250 | |
251 | |
252 | |
253 | movq 16(%rsp),%rbx |
254 | subq $1,%rbx |
255 | orq 24(%rsp),%rbx |
256 | orq 32(%rsp),%rbx |
257 | orq 40(%rsp),%rbx |
258 | |
259 | jnz .Lbeeu_err |
260 | |
261 | |
262 | |
263 | |
264 | movq 0(%rdx),%r8 |
265 | movq 8(%rdx),%r9 |
266 | movq 16(%rdx),%r10 |
267 | movq 24(%rdx),%r11 |
268 | xorq %rdi,%rdi |
269 | |
270 | .Lbeeu_reduction_loop: |
271 | movq %r12,16(%rsp) |
272 | movq %r13,24(%rsp) |
273 | movq %r14,32(%rsp) |
274 | movq %r15,40(%rsp) |
275 | movq %rbp,48(%rsp) |
276 | |
277 | |
278 | subq %r8,%r12 |
279 | sbbq %r9,%r13 |
280 | sbbq %r10,%r14 |
281 | sbbq %r11,%r15 |
282 | sbbq $0,%rbp |
283 | |
284 | |
285 | cmovcq 16(%rsp),%r12 |
286 | cmovcq 24(%rsp),%r13 |
287 | cmovcq 32(%rsp),%r14 |
288 | cmovcq 40(%rsp),%r15 |
289 | jnc .Lbeeu_reduction_loop |
290 | |
291 | |
292 | subq %r12,%r8 |
293 | sbbq %r13,%r9 |
294 | sbbq %r14,%r10 |
295 | sbbq %r15,%r11 |
296 | |
297 | .Lbeeu_save: |
298 | |
299 | movq 0(%rsp),%rdi |
300 | |
301 | movq %r8,0(%rdi) |
302 | movq %r9,8(%rdi) |
303 | movq %r10,16(%rdi) |
304 | movq %r11,24(%rdi) |
305 | |
306 | |
307 | movq $1,%rax |
308 | jmp .Lbeeu_finish |
309 | |
310 | .Lbeeu_err: |
311 | |
312 | xorq %rax,%rax |
313 | |
314 | .Lbeeu_finish: |
315 | addq $80,%rsp |
316 | .cfi_adjust_cfa_offset -80 |
317 | popq %rsi |
318 | .cfi_adjust_cfa_offset -8 |
319 | .cfi_restore rsi |
320 | popq %rbx |
321 | .cfi_adjust_cfa_offset -8 |
322 | .cfi_restore rbx |
323 | popq %r15 |
324 | .cfi_adjust_cfa_offset -8 |
325 | .cfi_restore r15 |
326 | popq %r14 |
327 | .cfi_adjust_cfa_offset -8 |
328 | .cfi_restore r14 |
329 | popq %r13 |
330 | .cfi_adjust_cfa_offset -8 |
331 | .cfi_restore r13 |
332 | popq %r12 |
333 | .cfi_adjust_cfa_offset -8 |
334 | .cfi_restore r12 |
335 | popq %rbp |
336 | .cfi_adjust_cfa_offset -8 |
337 | .cfi_restore rbp |
338 | .byte 0xf3,0xc3 |
339 | .cfi_endproc |
340 | |
341 | .size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime |
342 | #endif |
343 | |