github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/addaddmul_1msb0.asm (about) 1 dnl AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63. 2 3 dnl Copyright 2008 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 2.167 35 C AMD K10 2.167 36 C Intel P4 12.0 37 C Intel core2 4.0 38 C Intel corei ? 39 C Intel atom ? 40 C VIA nano ? 41 42 C TODO 43 C * Perhaps handle various n mod 3 sizes better. The code now is too large. 44 45 C INPUT PARAMETERS 46 define(`rp', `%rdi') 47 define(`ap', `%rsi') 48 define(`bp_param', `%rdx') 49 define(`n', `%rcx') 50 define(`u0', `%r8') 51 define(`v0', `%r9') 52 53 54 define(`bp', `%rbp') 55 56 ASM_START() 57 TEXT 58 ALIGN(16) 59 PROLOGUE(mpn_addaddmul_1msb0) 60 push %r12 61 push %rbp 62 63 lea (ap,n,8), ap 64 lea (bp_param,n,8), bp 65 lea (rp,n,8), rp 66 neg n 67 68 mov (ap,n,8), %rax 69 mul %r8 70 mov %rax, %r12 71 mov (bp,n,8), %rax 72 mov %rdx, %r10 73 add $3, n 74 jns L(end) 75 76 ALIGN(16) 77 L(top): mul %r9 78 add %rax, %r12 79 mov -16(ap,n,8), %rax 80 adc %rdx, %r10 81 mov %r12, -24(rp,n,8) 82 mul %r8 83 add %rax, %r10 84 mov -16(bp,n,8), %rax 85 mov $0, R32(%r11) 86 adc %rdx, %r11 87 mul %r9 88 add %rax, %r10 89 mov -8(ap,n,8), %rax 90 adc %rdx, %r11 91 mov %r10, -16(rp,n,8) 92 mul %r8 93 add %rax, %r11 94 mov -8(bp,n,8), %rax 95 mov $0, R32(%r12) 96 adc %rdx, %r12 97 mul %r9 98 add %rax, %r11 99 adc %rdx, %r12 100 mov (ap,n,8), %rax 101 mul %r8 102 add %rax, %r12 103 mov %r11, -8(rp,n,8) 104 mov (bp,n,8), %rax 105 mov $0, R32(%r10) 106 adc %rdx, %r10 107 add $3, n 108 js L(top) 109 110 L(end): cmp $1, R32(n) 111 ja 2f 112 jz 1f 113 114 mul %r9 115 add %rax, %r12 116 mov -16(ap), %rax 117 adc %rdx, %r10 118 mov %r12, -24(rp) 119 mul %r8 120 add %rax, %r10 121 mov -16(bp), %rax 122 mov $0, R32(%r11) 123 adc %rdx, %r11 124 mul %r9 125 add %rax, %r10 126 mov -8(ap), %rax 127 adc %rdx, %r11 128 mov %r10, -16(rp) 129 mul %r8 130 add %rax, %r11 131 mov -8(bp), %rax 132 mov $0, R32(%r12) 133 adc %rdx, %r12 134 mul %r9 135 add %rax, %r11 136 adc %rdx, %r12 137 mov %r11, -8(rp) 138 mov %r12, %rax 139 pop %rbp 140 pop %r12 141 ret 142 143 1: mul %r9 144 add %rax, %r12 145 mov -8(ap), %rax 146 adc %rdx, %r10 147 mov %r12, -16(rp) 148 mul %r8 149 add %rax, %r10 150 mov -8(bp), %rax 151 mov $0, R32(%r11) 152 adc %rdx, %r11 153 mul %r9 154 add %rax, %r10 155 adc %rdx, %r11 156 mov %r10, -8(rp) 157 mov %r11, %rax 158 pop %rbp 159 pop %r12 160 ret 161 162 2: mul %r9 163 add %rax, %r12 164 mov %r12, -8(rp) 165 adc %rdx, %r10 166 mov %r10, %rax 167 pop %rbp 168 pop %r12 169 ret 170 EPILOGUE()