github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/bd1/mul_1.asm (about) 1 dnl AMD64 mpn_mul_1 optimised for AMD Bulldozer. 2 3 dnl Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 35 C AMD K10 36 C AMD bd1 4 37 C AMD bobcat 38 C Intel P4 39 C Intel core2 40 C Intel NHM 41 C Intel SBR 42 C Intel atom 43 C VIA nano 44 45 C The loop of this code is the result of running a code generation and 46 C optimisation tool suite written by David Harvey and Torbjorn Granlund. 47 48 C TODO 49 C * Move loop code into feed-in blocks, to save insn for zeroing regs. 50 51 define(`rp', `%rdi') C rcx 52 define(`up', `%rsi') C rdx 53 define(`n_param', `%rdx') C r8 54 define(`v0', `%rcx') C r9 55 56 define(`n', `%rbx') 57 58 ABI_SUPPORT(DOS64) 59 ABI_SUPPORT(STD64) 60 61 IFDOS(` define(`up', ``%rsi'') ') dnl 62 IFDOS(` define(`rp', ``%rcx'') ') dnl 63 IFDOS(` define(`v0', ``%r9'') ') dnl 64 IFDOS(` define(`r9', ``rdi'') ') dnl 65 IFDOS(` define(`n', ``%r8'') ') dnl 66 IFDOS(` define(`r8', ``rbx'') ') dnl 67 68 ASM_START() 69 TEXT 70 ALIGN(16) 71 PROLOGUE(mpn_mul_1c) 72 IFDOS(``push %rsi '') 73 IFDOS(``push %rdi '') 74 IFDOS(``mov %rdx, %rsi '') 75 76 mov (up), %rax C read first u limb early 77 push %rbx 78 IFSTD(` mov n_param, %r11 ') C move away n from rdx, mul uses it 79 IFDOS(` mov n, %r11 ') 80 mul v0 81 82 IFSTD(` add %r8, %rax ') 83 IFDOS(` add 64(%rsp), %rax ') C 40 + 3*8 (3 push insns) 84 adc $0, %rdx 85 jmp L(common) 86 87 EPILOGUE() 88 89 ALIGN(16) 90 PROLOGUE(mpn_mul_1) 91 IFDOS(``push %rsi '') 92 IFDOS(``push %rdi '') 93 IFDOS(``mov %rdx, %rsi '') 94 95 mov (up), %rax C read first u limb early 96 push %rbx 97 IFSTD(` mov n_param, %r11 ') C move away n from rdx, mul uses it 98 IFDOS(` mov n, %r11 ') 99 mul v0 100 101 L(common): 102 IFSTD(` mov %r11, n ') 103 104 and $3, R32(%r11) 105 lea -16(rp,n,8), rp 106 jz L(b0) 107 cmp $2, R32(%r11) 108 jb L(b1) 109 jz L(b2) 110 111 L(b3): mov %rax, %r10 112 mov %rdx, %r11 113 mov 8(up), %rax 114 mul v0 115 lea (up,n,8), up 116 not n 117 jmp L(L3) 118 119 L(b0): mov %rax, %r9 120 mov %rdx, %r10 121 mov 8(up), %rax 122 lea (up,n,8), up 123 neg n 124 jmp L(L0) 125 126 L(b1): mov %rax, %r8 127 cmp $1, n 128 jz L(n1) 129 mov %rdx, %r9 130 lea (up,n,8), up 131 neg n 132 mov %r8, 16(rp,n,8) 133 inc n 134 jmp L(L1) 135 136 L(b2): mov %rax, %r11 137 mov %rdx, %r8 138 mov 8(up), %rax 139 lea (up,n,8), up 140 neg n 141 add $2, n 142 jns L(end) 143 144 ALIGN(16) 145 L(top): mul v0 146 mov %rdx, %r9 147 add %rax, %r8 148 adc $0, %r9 149 mov %r8, 8(rp,n,8) 150 mov %r11, (rp,n,8) 151 L(L1): mov (up,n,8), %rax 152 mul v0 153 add %rax, %r9 154 mov %rdx, %r10 155 mov 8(up,n,8), %rax 156 adc $0, %r10 157 L(L0): mul v0 158 add %rax, %r10 159 mov %rdx, %r11 160 mov 16(up,n,8), %rax 161 adc $0, %r11 162 mul v0 163 mov %r9, 16(rp,n,8) 164 L(L3): add %rax, %r11 165 mov %r10, 24(rp,n,8) 166 mov %rdx, %r8 167 adc $0, %r8 168 add $4, n 169 mov -8(up,n,8), %rax 170 js L(top) 171 172 L(end): mul v0 173 add %rax, %r8 174 adc $0, %rdx 175 mov %r11, (rp) 176 L(n1): mov %r8, 8(rp) 177 mov %rdx, %rax 178 179 pop %rbx 180 IFDOS(``pop %rdi '') 181 IFDOS(``pop %rsi '') 182 ret 183 EPILOGUE() 184 ASM_END()