github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/bd1/mul_2.asm (about) 1 dnl AMD64 mpn_mul_2 optimised for AMD Bulldozer. 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2008, 2011-2013 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C AMD K8,K9 37 C AMD K10 38 C AMD bull 4.36 average, quite fluctuating 39 C AMD pile 4.38 slighty fluctuating 40 C AMD steam 41 C AMD bobcat 42 C AMD jaguar 43 C Intel P4 44 C Intel core 45 C Intel NHM 46 C Intel SBR 47 C Intel IBR 48 C Intel HWL 49 C Intel BWL 50 C Intel atom 51 C VIA nano 52 53 C The loop of this code is the result of running a code generation and 54 C optimisation tool suite written by David Harvey and Torbjorn Granlund. 55 C Scheme: genxmul --mul 56 57 define(`rp', `%rdi') C rcx 58 define(`up', `%rsi') C rdx 59 define(`n_param', `%rdx') C r8 60 define(`vp', `%rcx') C r9 61 62 define(`v0', `%r8') 63 define(`v1', `%r9') 64 define(`w0', `%rbx') 65 define(`w1', `%rcx') 66 define(`w2', `%rbp') 67 define(`w3', `%r10') 68 define(`n', `%r11') 69 70 ABI_SUPPORT(DOS64) 71 ABI_SUPPORT(STD64) 72 73 ASM_START() 74 TEXT 75 ALIGN(32) 76 PROLOGUE(mpn_mul_2) 77 FUNC_ENTRY(4) 78 push %rbx 79 push %rbp 80 81 mov (up), %rax 82 83 mov (vp), v0 84 mov 8(vp), v1 85 86 lea (up,n_param,8), up 87 lea (rp,n_param,8), rp 88 89 mov n_param, n 90 mul v0 91 neg n 92 93 test $1, R8(n) 94 jnz L(bx1) 95 96 L(bx0): test $2, R8(n) 97 jnz L(b10) 98 99 L(b00): mov %rax, w0 100 mov %rdx, w1 101 xor R32(w2), R32(w2) 102 mov (up,n,8), %rax 103 jmp L(lo0) 104 105 L(b10): mov %rax, w2 106 mov %rdx, w3 107 mov (up,n,8), %rax 108 xor R32(w0), R32(w0) 109 mul v1 110 add $-2, n 111 jmp L(lo2) 112 113 L(bx1): test $2, R8(n) 114 jz L(b11) 115 116 L(b01): mov %rax, w3 117 mov %rdx, w0 118 mov (up,n,8), %rax 119 mul v1 120 xor R32(w1), R32(w1) 121 inc n 122 jmp L(lo1) 123 124 L(b11): mov %rax, w1 125 mov %rdx, w2 126 mov (up,n,8), %rax 127 xor R32(w3), R32(w3) 128 dec n 129 jmp L(lo3) 130 131 ALIGN(32) 132 L(top): mov -8(up,n,8), %rax 133 mul v1 134 mov w2, -16(rp,n,8) 135 L(lo1): add %rax, w0 136 mov w3, -8(rp,n,8) 137 adc %rdx, w1 138 mov (up,n,8), %rax 139 mul v0 140 mov $0, R32(w2) 141 add %rax, w0 142 adc %rdx, w1 143 adc $0, R32(w2) 144 mov (up,n,8), %rax 145 L(lo0): mul v1 146 add %rax, w1 147 adc %rdx, w2 148 mov 8(up,n,8), %rax 149 mul v0 150 add %rax, w1 151 mov w0, (rp,n,8) 152 mov $0, R32(w3) 153 mov 8(up,n,8), %rax 154 adc %rdx, w2 155 adc $0, R32(w3) 156 L(lo3): mul v1 157 add %rax, w2 158 mov 16(up,n,8), %rax 159 adc %rdx, w3 160 mul v0 161 add %rax, w2 162 mov 16(up,n,8), %rax 163 mov $0, R32(w0) 164 adc %rdx, w3 165 adc $0, R32(w0) 166 mul v1 167 mov w1, 8(rp,n,8) 168 L(lo2): add %rax, w3 169 adc %rdx, w0 170 mov 24(up,n,8), %rax 171 mul v0 172 add %rax, w3 173 adc %rdx, w0 174 mov $0, R32(w1) 175 adc $0, R32(w1) 176 add $4, n 177 jnc L(top) 178 179 L(end): mov -8(up,n,8), %rax 180 mul v1 181 mov w2, -16(rp,n,8) 182 add %rax, w0 183 mov w3, -8(rp,n,8) 184 adc %rdx, w1 185 mov w0, (rp,n,8) 186 mov w1, %rax 187 188 pop %rbp 189 pop %rbx 190 FUNC_EXIT() 191 ret 192 EPILOGUE()