| 1 | /* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add |
| 2 | the result to a second limb vector. |
| 3 | Copyright (C) 2003-2020 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU MP Library. |
| 5 | |
| 6 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU Lesser General Public License as published by |
| 8 | the Free Software Foundation; either version 2.1 of the License, or (at your |
| 9 | option) any later version. |
| 10 | |
| 11 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| 14 | License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU Lesser General Public License |
| 17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
| 18 | see <https://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | #include "sysdep.h" |
| 21 | #include "asm-syntax.h" |
| 22 | |
| 23 | #define rp %rdi |
| 24 | #define up %rsi |
| 25 | #define n %rdx |
| 26 | #define v0 %rcx |
| 27 | |
| 28 | #ifndef func |
| 29 | # define func __mpn_addmul_1 |
| 30 | # define ADDSUB add |
| 31 | #endif |
| 32 | |
| 33 | .text |
| 34 | ENTRY (func) |
| 35 | push %rbx |
| 36 | push %rbp |
| 37 | lea (%rdx), %rbx |
| 38 | neg %rbx |
| 39 | |
| 40 | mov (up), %rax |
| 41 | mov (rp), %r10 |
| 42 | |
| 43 | lea -16(rp,%rdx,8), rp |
| 44 | lea (up,%rdx,8), up |
| 45 | mul %rcx |
| 46 | |
| 47 | bt $0, %ebx |
| 48 | jc L(odd) |
| 49 | |
| 50 | lea (%rax), %r11 |
| 51 | mov 8(up,%rbx,8), %rax |
| 52 | lea (%rdx), %rbp |
| 53 | mul %rcx |
| 54 | add $2, %rbx |
| 55 | jns L(n2) |
| 56 | |
| 57 | lea (%rax), %r8 |
| 58 | mov (up,%rbx,8), %rax |
| 59 | lea (%rdx), %r9 |
| 60 | jmp L(mid) |
| 61 | |
| 62 | L(odd): add $1, %rbx |
| 63 | jns L(n1) |
| 64 | |
| 65 | lea (%rax), %r8 |
| 66 | mov (up,%rbx,8), %rax |
| 67 | lea (%rdx), %r9 |
| 68 | mul %rcx |
| 69 | lea (%rax), %r11 |
| 70 | mov 8(up,%rbx,8), %rax |
| 71 | lea (%rdx), %rbp |
| 72 | jmp L(e) |
| 73 | |
| 74 | .p2align 4 |
| 75 | L(top): mul %rcx |
| 76 | ADDSUB %r8, %r10 |
| 77 | lea (%rax), %r8 |
| 78 | mov (up,%rbx,8), %rax |
| 79 | adc %r9, %r11 |
| 80 | mov %r10, -8(rp,%rbx,8) |
| 81 | mov (rp,%rbx,8), %r10 |
| 82 | lea (%rdx), %r9 |
| 83 | adc $0, %rbp |
| 84 | L(mid): mul %rcx |
| 85 | ADDSUB %r11, %r10 |
| 86 | lea (%rax), %r11 |
| 87 | mov 8(up,%rbx,8), %rax |
| 88 | adc %rbp, %r8 |
| 89 | mov %r10, (rp,%rbx,8) |
| 90 | mov 8(rp,%rbx,8), %r10 |
| 91 | lea (%rdx), %rbp |
| 92 | adc $0, %r9 |
| 93 | L(e): add $2, %rbx |
| 94 | js L(top) |
| 95 | |
| 96 | mul %rcx |
| 97 | ADDSUB %r8, %r10 |
| 98 | adc %r9, %r11 |
| 99 | mov %r10, -8(rp) |
| 100 | adc $0, %rbp |
| 101 | L(n2): mov (rp), %r10 |
| 102 | ADDSUB %r11, %r10 |
| 103 | adc %rbp, %rax |
| 104 | mov %r10, (rp) |
| 105 | adc $0, %rdx |
| 106 | L(n1): mov 8(rp), %r10 |
| 107 | ADDSUB %rax, %r10 |
| 108 | mov %r10, 8(rp) |
| 109 | mov %ebx, %eax /* zero rax */ |
| 110 | adc %rdx, %rax |
| 111 | pop %rbp |
| 112 | pop %rbx |
| 113 | ret |
| 114 | END (func) |
| 115 | |