1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15
16.type beeu_mod_inverse_vartime,@function
17.hidden beeu_mod_inverse_vartime
18.globl beeu_mod_inverse_vartime
19.hidden beeu_mod_inverse_vartime
20.align 32
21beeu_mod_inverse_vartime:
22.cfi_startproc
23 pushq %rbp
24.cfi_adjust_cfa_offset 8
25.cfi_offset rbp,-16
26 pushq %r12
27.cfi_adjust_cfa_offset 8
28.cfi_offset r12,-24
29 pushq %r13
30.cfi_adjust_cfa_offset 8
31.cfi_offset r13,-32
32 pushq %r14
33.cfi_adjust_cfa_offset 8
34.cfi_offset r14,-40
35 pushq %r15
36.cfi_adjust_cfa_offset 8
37.cfi_offset r15,-48
38 pushq %rbx
39.cfi_adjust_cfa_offset 8
40.cfi_offset rbx,-56
41 pushq %rsi
42.cfi_adjust_cfa_offset 8
43.cfi_offset rsi,-64
44
45 subq $80,%rsp
46.cfi_adjust_cfa_offset 80
47 movq %rdi,0(%rsp)
48
49
50 movq $1,%r8
51 xorq %r9,%r9
52 xorq %r10,%r10
53 xorq %r11,%r11
54 xorq %rdi,%rdi
55
56 xorq %r12,%r12
57 xorq %r13,%r13
58 xorq %r14,%r14
59 xorq %r15,%r15
60 xorq %rbp,%rbp
61
62
63 vmovdqu 0(%rsi),%xmm0
64 vmovdqu 16(%rsi),%xmm1
65 vmovdqu %xmm0,48(%rsp)
66 vmovdqu %xmm1,64(%rsp)
67
68 vmovdqu 0(%rdx),%xmm0
69 vmovdqu 16(%rdx),%xmm1
70 vmovdqu %xmm0,16(%rsp)
71 vmovdqu %xmm1,32(%rsp)
72
73.Lbeeu_loop:
74 xorq %rbx,%rbx
75 orq 48(%rsp),%rbx
76 orq 56(%rsp),%rbx
77 orq 64(%rsp),%rbx
78 orq 72(%rsp),%rbx
79 jz .Lbeeu_loop_end
80
81
82
83
84
85
86
87
88
89
90 movq $1,%rcx
91
92
93.Lbeeu_shift_loop_XB:
94 movq %rcx,%rbx
95 andq 48(%rsp),%rbx
96 jnz .Lbeeu_shift_loop_end_XB
97
98
99 movq $1,%rbx
100 andq %r8,%rbx
101 jz .Lshift1_0
102 addq 0(%rdx),%r8
103 adcq 8(%rdx),%r9
104 adcq 16(%rdx),%r10
105 adcq 24(%rdx),%r11
106 adcq $0,%rdi
107
108.Lshift1_0:
109 shrdq $1,%r9,%r8
110 shrdq $1,%r10,%r9
111 shrdq $1,%r11,%r10
112 shrdq $1,%rdi,%r11
113 shrq $1,%rdi
114
115 shlq $1,%rcx
116
117
118
119
120
121 cmpq $0x8000000,%rcx
122 jne .Lbeeu_shift_loop_XB
123
124.Lbeeu_shift_loop_end_XB:
125 bsfq %rcx,%rcx
126 testq %rcx,%rcx
127 jz .Lbeeu_no_shift_XB
128
129
130
131 movq 8+48(%rsp),%rax
132 movq 16+48(%rsp),%rbx
133 movq 24+48(%rsp),%rsi
134
135 shrdq %cl,%rax,0+48(%rsp)
136 shrdq %cl,%rbx,8+48(%rsp)
137 shrdq %cl,%rsi,16+48(%rsp)
138
139 shrq %cl,%rsi
140 movq %rsi,24+48(%rsp)
141
142
143.Lbeeu_no_shift_XB:
144
145 movq $1,%rcx
146
147
148.Lbeeu_shift_loop_YA:
149 movq %rcx,%rbx
150 andq 16(%rsp),%rbx
151 jnz .Lbeeu_shift_loop_end_YA
152
153
154 movq $1,%rbx
155 andq %r12,%rbx
156 jz .Lshift1_1
157 addq 0(%rdx),%r12
158 adcq 8(%rdx),%r13
159 adcq 16(%rdx),%r14
160 adcq 24(%rdx),%r15
161 adcq $0,%rbp
162
163.Lshift1_1:
164 shrdq $1,%r13,%r12
165 shrdq $1,%r14,%r13
166 shrdq $1,%r15,%r14
167 shrdq $1,%rbp,%r15
168 shrq $1,%rbp
169
170 shlq $1,%rcx
171
172
173
174
175
176 cmpq $0x8000000,%rcx
177 jne .Lbeeu_shift_loop_YA
178
179.Lbeeu_shift_loop_end_YA:
180 bsfq %rcx,%rcx
181 testq %rcx,%rcx
182 jz .Lbeeu_no_shift_YA
183
184
185
186 movq 8+16(%rsp),%rax
187 movq 16+16(%rsp),%rbx
188 movq 24+16(%rsp),%rsi
189
190 shrdq %cl,%rax,0+16(%rsp)
191 shrdq %cl,%rbx,8+16(%rsp)
192 shrdq %cl,%rsi,16+16(%rsp)
193
194 shrq %cl,%rsi
195 movq %rsi,24+16(%rsp)
196
197
198.Lbeeu_no_shift_YA:
199
200 movq 48(%rsp),%rax
201 movq 56(%rsp),%rbx
202 movq 64(%rsp),%rsi
203 movq 72(%rsp),%rcx
204 subq 16(%rsp),%rax
205 sbbq 24(%rsp),%rbx
206 sbbq 32(%rsp),%rsi
207 sbbq 40(%rsp),%rcx
208 jnc .Lbeeu_B_bigger_than_A
209
210
211 movq 16(%rsp),%rax
212 movq 24(%rsp),%rbx
213 movq 32(%rsp),%rsi
214 movq 40(%rsp),%rcx
215 subq 48(%rsp),%rax
216 sbbq 56(%rsp),%rbx
217 sbbq 64(%rsp),%rsi
218 sbbq 72(%rsp),%rcx
219 movq %rax,16(%rsp)
220 movq %rbx,24(%rsp)
221 movq %rsi,32(%rsp)
222 movq %rcx,40(%rsp)
223
224
225 addq %r8,%r12
226 adcq %r9,%r13
227 adcq %r10,%r14
228 adcq %r11,%r15
229 adcq %rdi,%rbp
230 jmp .Lbeeu_loop
231
232.Lbeeu_B_bigger_than_A:
233
234 movq %rax,48(%rsp)
235 movq %rbx,56(%rsp)
236 movq %rsi,64(%rsp)
237 movq %rcx,72(%rsp)
238
239
240 addq %r12,%r8
241 adcq %r13,%r9
242 adcq %r14,%r10
243 adcq %r15,%r11
244 adcq %rbp,%rdi
245
246 jmp .Lbeeu_loop
247
248.Lbeeu_loop_end:
249
250
251
252
253 movq 16(%rsp),%rbx
254 subq $1,%rbx
255 orq 24(%rsp),%rbx
256 orq 32(%rsp),%rbx
257 orq 40(%rsp),%rbx
258
259 jnz .Lbeeu_err
260
261
262
263
264 movq 0(%rdx),%r8
265 movq 8(%rdx),%r9
266 movq 16(%rdx),%r10
267 movq 24(%rdx),%r11
268 xorq %rdi,%rdi
269
270.Lbeeu_reduction_loop:
271 movq %r12,16(%rsp)
272 movq %r13,24(%rsp)
273 movq %r14,32(%rsp)
274 movq %r15,40(%rsp)
275 movq %rbp,48(%rsp)
276
277
278 subq %r8,%r12
279 sbbq %r9,%r13
280 sbbq %r10,%r14
281 sbbq %r11,%r15
282 sbbq $0,%rbp
283
284
285 cmovcq 16(%rsp),%r12
286 cmovcq 24(%rsp),%r13
287 cmovcq 32(%rsp),%r14
288 cmovcq 40(%rsp),%r15
289 jnc .Lbeeu_reduction_loop
290
291
292 subq %r12,%r8
293 sbbq %r13,%r9
294 sbbq %r14,%r10
295 sbbq %r15,%r11
296
297.Lbeeu_save:
298
299 movq 0(%rsp),%rdi
300
301 movq %r8,0(%rdi)
302 movq %r9,8(%rdi)
303 movq %r10,16(%rdi)
304 movq %r11,24(%rdi)
305
306
307 movq $1,%rax
308 jmp .Lbeeu_finish
309
310.Lbeeu_err:
311
312 xorq %rax,%rax
313
314.Lbeeu_finish:
315 addq $80,%rsp
316.cfi_adjust_cfa_offset -80
317 popq %rsi
318.cfi_adjust_cfa_offset -8
319.cfi_restore rsi
320 popq %rbx
321.cfi_adjust_cfa_offset -8
322.cfi_restore rbx
323 popq %r15
324.cfi_adjust_cfa_offset -8
325.cfi_restore r15
326 popq %r14
327.cfi_adjust_cfa_offset -8
328.cfi_restore r14
329 popq %r13
330.cfi_adjust_cfa_offset -8
331.cfi_restore r13
332 popq %r12
333.cfi_adjust_cfa_offset -8
334.cfi_restore r12
335 popq %rbp
336.cfi_adjust_cfa_offset -8
337.cfi_restore rbp
338 .byte 0xf3,0xc3
339.cfi_endproc
340
341.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
342#endif
343