github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/atom/aorsmul_1.asm (about) 1 dnl AMD64 mpn_addmul_1/mpn_submul_1 optimised for Intel Atom. 2 3 dnl Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb best 34 C AMD K8,K9 35 C AMD K10 36 C AMD bd1 37 C AMD bd2 38 C AMD bobcat 39 C AMD jaguar 40 C Intel P4 41 C Intel PNR 42 C Intel NHM 43 C Intel SBR 44 C Intel IBR 45 C Intel HWL 46 C Intel BWL 47 C Intel atom 19.37 this 48 C VIA nano 49 50 C The loop of this code is the result of running a code generation and 51 C optimisation tool suite written by David Harvey and Torbjorn Granlund. 52 53 define(`rp', `%rdi') C rcx 54 define(`up', `%rsi') C rdx 55 define(`n_param', `%rdx') C r8 56 define(`v0', `%rcx') C r9 57 58 define(`n', `%rbx') 59 60 ifdef(`OPERATION_addmul_1',` 61 define(`ADDSUB', `add') 62 define(`func', `mpn_addmul_1') 63 ') 64 ifdef(`OPERATION_submul_1',` 65 define(`ADDSUB', `sub') 66 define(`func', `mpn_submul_1') 67 ') 68 69 ABI_SUPPORT(DOS64) 70 ABI_SUPPORT(STD64) 71 72 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 73 74 ASM_START() 75 TEXT 76 ALIGN(16) 77 PROLOGUE(func) 78 FUNC_ENTRY(4) 79 push %rbx 80 81 mov (up), %rax 82 lea -8(up,n_param,8), up 83 lea -16(rp,n_param,8), rp 84 85 test $1, R8(n_param) 86 jnz L(bx1) 87 88 L(bx0): test $2, R8(n_param) 89 jnz L(b10) 90 91 L(b00): mov $1, R32(n) 92 sub n_param, n 93 mul v0 94 mov %rax, %r11 95 mov 8(up,n,8), %rax 96 mov %rdx, %r10 97 mul v0 98 mov %rax, %r8 99 mov 16(up,n,8), %rax 100 jmp L(lo0) 101 102 L(b10): mov $3, R32(n) 103 sub n_param, n 104 mul v0 105 mov %rax, %r11 106 mov -8(up,n,8), %rax 107 mov %rdx, %r10 108 mul v0 109 test n, n 110 jns L(cj2) 111 mov %rax, %r8 112 mov (up,n,8), %rax 113 mov %rdx, %r9 114 jmp L(lo2) 115 116 L(bx1): test $2, R8(n_param) 117 jnz L(b11) 118 119 L(b01): mov $2, R32(n) 120 sub n_param, n 121 mul v0 122 test n, n 123 jns L(cj1) 124 mov %rax, %r8 125 mov (up,n,8), %rax 126 mov %rdx, %r9 127 mul v0 128 mov %rax, %r11 129 mov 8(up,n,8), %rax 130 mov %rdx, %r10 131 jmp L(lo1) 132 133 L(b11): xor R32(n), R32(n) 134 sub n_param, n 135 mul v0 136 mov %rax, %r8 137 mov 16(up,n,8), %rax 138 mov %rdx, %r9 139 mul v0 140 mov %rax, %r11 141 mov 24(up,n,8), %rax 142 jmp L(lo3) 143 144 ALIGN(16) 145 L(top): mul v0 146 ADDSUB %r8, -16(rp,n,8) 147 mov %rax, %r8 148 mov (up,n,8), %rax 149 adc %r9, %r11 150 mov %rdx, %r9 151 adc $0, %r10 152 L(lo2): mul v0 153 ADDSUB %r11, -8(rp,n,8) 154 mov %rax, %r11 155 mov 8(up,n,8), %rax 156 adc %r10, %r8 157 mov %rdx, %r10 158 adc $0, %r9 159 L(lo1): mul v0 160 ADDSUB %r8, (rp,n,8) 161 mov %rax, %r8 162 adc %r9, %r11 163 mov 16(up,n,8), %rax 164 adc $0, %r10 165 L(lo0): mov %rdx, %r9 166 mul v0 167 ADDSUB %r11, 8(rp,n,8) 168 mov %rax, %r11 169 adc %r10, %r8 170 mov 24(up,n,8), %rax 171 adc $0, %r9 172 L(lo3): add $4, n 173 mov %rdx, %r10 174 js L(top) 175 176 L(end): mul v0 177 ADDSUB %r8, -16(rp,n,8) 178 adc %r9, %r11 179 adc $0, %r10 180 L(cj2): ADDSUB %r11, -8(rp,n,8) 181 adc %r10, %rax 182 adc $0, %rdx 183 L(cj1): ADDSUB %rax, (rp,n,8) 184 mov $0, R32(%rax) 185 adc %rdx, %rax 186 pop %rbx 187 FUNC_EXIT() 188 ret 189 EPILOGUE() 190 ASM_END()