| 1 | # This file is generated from a similarly-named Perl script in the BoringSSL |
| 2 | # source tree. Do not edit by hand. |
| 3 | |
| 4 | #if defined(__has_feature) |
| 5 | #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) |
| 6 | #define OPENSSL_NO_ASM |
| 7 | #endif |
| 8 | #endif |
| 9 | |
| 10 | #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) |
| 11 | #if defined(BORINGSSL_PREFIX) |
| 12 | #include <boringssl_prefix_symbols_asm.h> |
| 13 | #endif |
| 14 | .text |
| 15 | |
| 16 | .type beeu_mod_inverse_vartime,@function |
| 17 | .hidden beeu_mod_inverse_vartime |
| 18 | .globl beeu_mod_inverse_vartime |
| 19 | .hidden beeu_mod_inverse_vartime |
| 20 | .align 32 |
| 21 | beeu_mod_inverse_vartime: |
| 22 | .cfi_startproc |
| 23 | pushq %rbp |
| 24 | .cfi_adjust_cfa_offset 8 |
| 25 | .cfi_offset rbp,-16 |
| 26 | pushq %r12 |
| 27 | .cfi_adjust_cfa_offset 8 |
| 28 | .cfi_offset r12,-24 |
| 29 | pushq %r13 |
| 30 | .cfi_adjust_cfa_offset 8 |
| 31 | .cfi_offset r13,-32 |
| 32 | pushq %r14 |
| 33 | .cfi_adjust_cfa_offset 8 |
| 34 | .cfi_offset r14,-40 |
| 35 | pushq %r15 |
| 36 | .cfi_adjust_cfa_offset 8 |
| 37 | .cfi_offset r15,-48 |
| 38 | pushq %rbx |
| 39 | .cfi_adjust_cfa_offset 8 |
| 40 | .cfi_offset rbx,-56 |
| 41 | pushq %rsi |
| 42 | .cfi_adjust_cfa_offset 8 |
| 43 | .cfi_offset rsi,-64 |
| 44 | |
| 45 | subq $80,%rsp |
| 46 | .cfi_adjust_cfa_offset 80 |
| 47 | movq %rdi,0(%rsp) |
| 48 | |
| 49 | |
| 50 | movq $1,%r8 |
| 51 | xorq %r9,%r9 |
| 52 | xorq %r10,%r10 |
| 53 | xorq %r11,%r11 |
| 54 | xorq %rdi,%rdi |
| 55 | |
| 56 | xorq %r12,%r12 |
| 57 | xorq %r13,%r13 |
| 58 | xorq %r14,%r14 |
| 59 | xorq %r15,%r15 |
| 60 | xorq %rbp,%rbp |
| 61 | |
| 62 | |
| 63 | vmovdqu 0(%rsi),%xmm0 |
| 64 | vmovdqu 16(%rsi),%xmm1 |
| 65 | vmovdqu %xmm0,48(%rsp) |
| 66 | vmovdqu %xmm1,64(%rsp) |
| 67 | |
| 68 | vmovdqu 0(%rdx),%xmm0 |
| 69 | vmovdqu 16(%rdx),%xmm1 |
| 70 | vmovdqu %xmm0,16(%rsp) |
| 71 | vmovdqu %xmm1,32(%rsp) |
| 72 | |
| 73 | .Lbeeu_loop: |
| 74 | xorq %rbx,%rbx |
| 75 | orq 48(%rsp),%rbx |
| 76 | orq 56(%rsp),%rbx |
| 77 | orq 64(%rsp),%rbx |
| 78 | orq 72(%rsp),%rbx |
| 79 | jz .Lbeeu_loop_end |
| 80 | |
| 81 | |
| 82 | |
| 83 | |
| 84 | |
| 85 | |
| 86 | |
| 87 | |
| 88 | |
| 89 | |
| 90 | movq $1,%rcx |
| 91 | |
| 92 | |
| 93 | .Lbeeu_shift_loop_XB: |
| 94 | movq %rcx,%rbx |
| 95 | andq 48(%rsp),%rbx |
| 96 | jnz .Lbeeu_shift_loop_end_XB |
| 97 | |
| 98 | |
| 99 | movq $1,%rbx |
| 100 | andq %r8,%rbx |
| 101 | jz .Lshift1_0 |
| 102 | addq 0(%rdx),%r8 |
| 103 | adcq 8(%rdx),%r9 |
| 104 | adcq 16(%rdx),%r10 |
| 105 | adcq 24(%rdx),%r11 |
| 106 | adcq $0,%rdi |
| 107 | |
| 108 | .Lshift1_0: |
| 109 | shrdq $1,%r9,%r8 |
| 110 | shrdq $1,%r10,%r9 |
| 111 | shrdq $1,%r11,%r10 |
| 112 | shrdq $1,%rdi,%r11 |
| 113 | shrq $1,%rdi |
| 114 | |
| 115 | shlq $1,%rcx |
| 116 | |
| 117 | |
| 118 | |
| 119 | |
| 120 | |
| 121 | cmpq $0x8000000,%rcx |
| 122 | jne .Lbeeu_shift_loop_XB |
| 123 | |
| 124 | .Lbeeu_shift_loop_end_XB: |
| 125 | bsfq %rcx,%rcx |
| 126 | testq %rcx,%rcx |
| 127 | jz .Lbeeu_no_shift_XB |
| 128 | |
| 129 | |
| 130 | |
| 131 | movq 8+48(%rsp),%rax |
| 132 | movq 16+48(%rsp),%rbx |
| 133 | movq 24+48(%rsp),%rsi |
| 134 | |
| 135 | shrdq %cl,%rax,0+48(%rsp) |
| 136 | shrdq %cl,%rbx,8+48(%rsp) |
| 137 | shrdq %cl,%rsi,16+48(%rsp) |
| 138 | |
| 139 | shrq %cl,%rsi |
| 140 | movq %rsi,24+48(%rsp) |
| 141 | |
| 142 | |
| 143 | .Lbeeu_no_shift_XB: |
| 144 | |
| 145 | movq $1,%rcx |
| 146 | |
| 147 | |
| 148 | .Lbeeu_shift_loop_YA: |
| 149 | movq %rcx,%rbx |
| 150 | andq 16(%rsp),%rbx |
| 151 | jnz .Lbeeu_shift_loop_end_YA |
| 152 | |
| 153 | |
| 154 | movq $1,%rbx |
| 155 | andq %r12,%rbx |
| 156 | jz .Lshift1_1 |
| 157 | addq 0(%rdx),%r12 |
| 158 | adcq 8(%rdx),%r13 |
| 159 | adcq 16(%rdx),%r14 |
| 160 | adcq 24(%rdx),%r15 |
| 161 | adcq $0,%rbp |
| 162 | |
| 163 | .Lshift1_1: |
| 164 | shrdq $1,%r13,%r12 |
| 165 | shrdq $1,%r14,%r13 |
| 166 | shrdq $1,%r15,%r14 |
| 167 | shrdq $1,%rbp,%r15 |
| 168 | shrq $1,%rbp |
| 169 | |
| 170 | shlq $1,%rcx |
| 171 | |
| 172 | |
| 173 | |
| 174 | |
| 175 | |
| 176 | cmpq $0x8000000,%rcx |
| 177 | jne .Lbeeu_shift_loop_YA |
| 178 | |
| 179 | .Lbeeu_shift_loop_end_YA: |
| 180 | bsfq %rcx,%rcx |
| 181 | testq %rcx,%rcx |
| 182 | jz .Lbeeu_no_shift_YA |
| 183 | |
| 184 | |
| 185 | |
| 186 | movq 8+16(%rsp),%rax |
| 187 | movq 16+16(%rsp),%rbx |
| 188 | movq 24+16(%rsp),%rsi |
| 189 | |
| 190 | shrdq %cl,%rax,0+16(%rsp) |
| 191 | shrdq %cl,%rbx,8+16(%rsp) |
| 192 | shrdq %cl,%rsi,16+16(%rsp) |
| 193 | |
| 194 | shrq %cl,%rsi |
| 195 | movq %rsi,24+16(%rsp) |
| 196 | |
| 197 | |
| 198 | .Lbeeu_no_shift_YA: |
| 199 | |
| 200 | movq 48(%rsp),%rax |
| 201 | movq 56(%rsp),%rbx |
| 202 | movq 64(%rsp),%rsi |
| 203 | movq 72(%rsp),%rcx |
| 204 | subq 16(%rsp),%rax |
| 205 | sbbq 24(%rsp),%rbx |
| 206 | sbbq 32(%rsp),%rsi |
| 207 | sbbq 40(%rsp),%rcx |
| 208 | jnc .Lbeeu_B_bigger_than_A |
| 209 | |
| 210 | |
| 211 | movq 16(%rsp),%rax |
| 212 | movq 24(%rsp),%rbx |
| 213 | movq 32(%rsp),%rsi |
| 214 | movq 40(%rsp),%rcx |
| 215 | subq 48(%rsp),%rax |
| 216 | sbbq 56(%rsp),%rbx |
| 217 | sbbq 64(%rsp),%rsi |
| 218 | sbbq 72(%rsp),%rcx |
| 219 | movq %rax,16(%rsp) |
| 220 | movq %rbx,24(%rsp) |
| 221 | movq %rsi,32(%rsp) |
| 222 | movq %rcx,40(%rsp) |
| 223 | |
| 224 | |
| 225 | addq %r8,%r12 |
| 226 | adcq %r9,%r13 |
| 227 | adcq %r10,%r14 |
| 228 | adcq %r11,%r15 |
| 229 | adcq %rdi,%rbp |
| 230 | jmp .Lbeeu_loop |
| 231 | |
| 232 | .Lbeeu_B_bigger_than_A: |
| 233 | |
| 234 | movq %rax,48(%rsp) |
| 235 | movq %rbx,56(%rsp) |
| 236 | movq %rsi,64(%rsp) |
| 237 | movq %rcx,72(%rsp) |
| 238 | |
| 239 | |
| 240 | addq %r12,%r8 |
| 241 | adcq %r13,%r9 |
| 242 | adcq %r14,%r10 |
| 243 | adcq %r15,%r11 |
| 244 | adcq %rbp,%rdi |
| 245 | |
| 246 | jmp .Lbeeu_loop |
| 247 | |
| 248 | .Lbeeu_loop_end: |
| 249 | |
| 250 | |
| 251 | |
| 252 | |
| 253 | movq 16(%rsp),%rbx |
| 254 | subq $1,%rbx |
| 255 | orq 24(%rsp),%rbx |
| 256 | orq 32(%rsp),%rbx |
| 257 | orq 40(%rsp),%rbx |
| 258 | |
| 259 | jnz .Lbeeu_err |
| 260 | |
| 261 | |
| 262 | |
| 263 | |
| 264 | movq 0(%rdx),%r8 |
| 265 | movq 8(%rdx),%r9 |
| 266 | movq 16(%rdx),%r10 |
| 267 | movq 24(%rdx),%r11 |
| 268 | xorq %rdi,%rdi |
| 269 | |
| 270 | .Lbeeu_reduction_loop: |
| 271 | movq %r12,16(%rsp) |
| 272 | movq %r13,24(%rsp) |
| 273 | movq %r14,32(%rsp) |
| 274 | movq %r15,40(%rsp) |
| 275 | movq %rbp,48(%rsp) |
| 276 | |
| 277 | |
| 278 | subq %r8,%r12 |
| 279 | sbbq %r9,%r13 |
| 280 | sbbq %r10,%r14 |
| 281 | sbbq %r11,%r15 |
| 282 | sbbq $0,%rbp |
| 283 | |
| 284 | |
| 285 | cmovcq 16(%rsp),%r12 |
| 286 | cmovcq 24(%rsp),%r13 |
| 287 | cmovcq 32(%rsp),%r14 |
| 288 | cmovcq 40(%rsp),%r15 |
| 289 | jnc .Lbeeu_reduction_loop |
| 290 | |
| 291 | |
| 292 | subq %r12,%r8 |
| 293 | sbbq %r13,%r9 |
| 294 | sbbq %r14,%r10 |
| 295 | sbbq %r15,%r11 |
| 296 | |
| 297 | .Lbeeu_save: |
| 298 | |
| 299 | movq 0(%rsp),%rdi |
| 300 | |
| 301 | movq %r8,0(%rdi) |
| 302 | movq %r9,8(%rdi) |
| 303 | movq %r10,16(%rdi) |
| 304 | movq %r11,24(%rdi) |
| 305 | |
| 306 | |
| 307 | movq $1,%rax |
| 308 | jmp .Lbeeu_finish |
| 309 | |
| 310 | .Lbeeu_err: |
| 311 | |
| 312 | xorq %rax,%rax |
| 313 | |
| 314 | .Lbeeu_finish: |
| 315 | addq $80,%rsp |
| 316 | .cfi_adjust_cfa_offset -80 |
| 317 | popq %rsi |
| 318 | .cfi_adjust_cfa_offset -8 |
| 319 | .cfi_restore rsi |
| 320 | popq %rbx |
| 321 | .cfi_adjust_cfa_offset -8 |
| 322 | .cfi_restore rbx |
| 323 | popq %r15 |
| 324 | .cfi_adjust_cfa_offset -8 |
| 325 | .cfi_restore r15 |
| 326 | popq %r14 |
| 327 | .cfi_adjust_cfa_offset -8 |
| 328 | .cfi_restore r14 |
| 329 | popq %r13 |
| 330 | .cfi_adjust_cfa_offset -8 |
| 331 | .cfi_restore r13 |
| 332 | popq %r12 |
| 333 | .cfi_adjust_cfa_offset -8 |
| 334 | .cfi_restore r12 |
| 335 | popq %rbp |
| 336 | .cfi_adjust_cfa_offset -8 |
| 337 | .cfi_restore rbp |
| 338 | .byte 0xf3,0xc3 |
| 339 | .cfi_endproc |
| 340 | |
| 341 | .size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime |
| 342 | #endif |
| 343 | |