github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/atom/addmul_2.asm (about) 1 dnl AMD64 mpn_addmul_2 optimised for Intel Atom. 2 3 dnl Copyright 2008, 2011-2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb best 34 C AMD K8,K9 35 C AMD K10 36 C AMD bd1 37 C AMD bd2 38 C AMD bobcat 39 C AMD jaguar 40 C Intel P4 41 C Intel PNR 42 C Intel NHM 43 C Intel SBR 44 C Intel IBR 45 C Intel HWL 46 C Intel BWL 47 C Intel atom 18.8 this 48 C VIA nano 49 50 C The loop of this code is the result of running a code generation and 51 C optimisation tool suite written by David Harvey and Torbjorn Granlund. 52 53 define(`rp', `%rdi') C rcx 54 define(`up', `%rsi') C rdx 55 define(`n_param', `%rdx') C r8 56 define(`vp', `%rcx') C r9 57 58 define(`v0', `%r8') 59 define(`v1', `%r9') 60 define(`w0', `%rbx') 61 define(`w1', `%rcx') 62 define(`w2', `%rbp') 63 define(`w3', `%r10') 64 define(`n', `%r11') 65 66 ABI_SUPPORT(DOS64) 67 ABI_SUPPORT(STD64) 68 69 ASM_START() 70 TEXT 71 ALIGN(16) 72 PROLOGUE(mpn_addmul_2) 73 FUNC_ENTRY(4) 74 push %rbx 75 push %rbp 76 77 mov (up), %rax 78 79 mov (vp), v0 80 mov 8(vp), v1 81 82 mov n_param, n 83 mul v0 84 85 test $1, R8(n) 86 jnz L(bx1) 87 88 L(bx0): test $2, R8(n) 89 jnz L(b10) 90 91 L(b00): mov %rax, w0 92 mov (up), %rax 93 mov %rdx, w1 94 xor R32(w2), R32(w2) 95 lea -8(rp), rp 96 jmp L(lo0) 97 98 L(b10): mov %rax, w2 99 mov (up), %rax 100 mov %rdx, w3 101 xor R32(w0), R32(w0) 102 lea -16(up), up 103 lea -24(rp), rp 104 jmp L(lo2) 105 106 L(bx1): test $2, R8(n) 107 jnz L(b11) 108 109 L(b01): mov %rax, w3 110 mov %rdx, w0 111 mov (up), %rax 112 xor R32(w1), R32(w1) 113 lea 8(up), up 114 dec n 115 jmp L(lo1) 116 117 L(b11): mov %rax, w1 118 mov (up), %rax 119 mov %rdx, w2 120 xor R32(w3), R32(w3) 121 lea -8(up), up 122 lea -16(rp), rp 123 jmp L(lo3) 124 125 ALIGN(16) 126 L(top): 127 L(lo1): mul v1 128 add w3, (rp) 129 mov $0, R32(w2) 130 adc %rax, w0 131 mov (up), %rax 132 adc %rdx, w1 133 mul v0 134 add %rax, w0 135 mov (up), %rax 136 adc %rdx, w1 137 adc $0, R32(w2) 138 L(lo0): mul v1 139 add w0, 8(rp) 140 adc %rax, w1 141 mov 8(up), %rax 142 mov $0, R32(w3) 143 adc %rdx, w2 144 mul v0 145 add %rax, w1 146 mov 8(up), %rax 147 adc %rdx, w2 148 adc $0, R32(w3) 149 L(lo3): mul v1 150 add w1, 16(rp) 151 adc %rax, w2 152 mov 16(up), %rax 153 mov $0, R32(w0) 154 adc %rdx, w3 155 mul v0 156 add %rax, w2 157 mov 16(up), %rax 158 adc %rdx, w3 159 adc $0, R32(w0) 160 L(lo2): mul v1 161 add w2, 24(rp) 162 adc %rax, w3 163 mov 24(up), %rax 164 adc %rdx, w0 165 mov $0, R32(w1) 166 lea 32(rp), rp 167 mul v0 168 lea 32(up), up 169 add %rax, w3 170 adc %rdx, w0 171 mov -8(up), %rax 172 adc $0, R32(w1) 173 sub $4, n 174 ja L(top) 175 176 L(end): mul v1 177 add w3, (rp) 178 adc %rax, w0 179 adc %rdx, w1 180 mov w0, 8(rp) 181 mov w1, %rax 182 pop %rbp 183 pop %rbx 184 FUNC_EXIT() 185 ret 186 EPILOGUE()