github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/mul_1.asm (about) 1 dnl AMD64 mpn_mul_1. 2 3 dnl Copyright 2003-2005, 2007, 2008, 2012 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C AMD K8,K9 2.5 35 C AMD K10 2.5 36 C AMD bd1 5.0 37 C AMD bobcat 5.5 38 C Intel P4 12.3 39 C Intel core2 4.0 40 C Intel NHM 3.75 41 C Intel SBR 2.95 42 C Intel atom 19.8 43 C VIA nano 4.25 44 45 C The loop of this code is the result of running a code generation and 46 C optimization tool suite written by David Harvey and Torbjorn Granlund. 47 48 C TODO 49 C * The loop is great, but the prologue and epilogue code was quickly written. 50 C Tune it! 51 52 define(`rp', `%rdi') C rcx 53 define(`up', `%rsi') C rdx 54 define(`n_param', `%rdx') C r8 55 define(`vl', `%rcx') C r9 56 57 define(`n', `%r11') 58 59 ABI_SUPPORT(DOS64) 60 ABI_SUPPORT(STD64) 61 62 IFDOS(` define(`up', ``%rsi'') ') dnl 63 IFDOS(` define(`rp', ``%rcx'') ') dnl 64 IFDOS(` define(`vl', ``%r9'') ') dnl 65 IFDOS(` define(`r9', ``rdi'') ') dnl 66 IFDOS(` define(`n', ``%r8'') ') dnl 67 IFDOS(` define(`r8', ``r11'') ') dnl 68 69 ASM_START() 70 TEXT 71 ALIGN(16) 72 PROLOGUE(mpn_mul_1c) 73 IFDOS(``push %rsi '') 74 IFDOS(``push %rdi '') 75 IFDOS(``mov %rdx, %rsi '') 76 push %rbx 77 IFSTD(` mov %r8, %r10') 78 IFDOS(` mov 64(%rsp), %r10') C 40 + 3*8 (3 push insns) 79 jmp L(common) 80 EPILOGUE() 81 82 PROLOGUE(mpn_mul_1) 83 IFDOS(``push %rsi '') 84 IFDOS(``push %rdi '') 85 IFDOS(``mov %rdx, %rsi '') 86 87 push %rbx 88 xor %r10, %r10 89 L(common): 90 mov (up), %rax C read first u limb early 91 IFSTD(` mov n_param, %rbx ') C move away n from rdx, mul uses it 92 IFDOS(` mov n, %rbx ') 93 mul vl 94 IFSTD(` mov %rbx, n ') 95 96 add %r10, %rax 97 adc $0, %rdx 98 99 and $3, R32(%rbx) 100 jz L(b0) 101 cmp $2, R32(%rbx) 102 jz L(b2) 103 jg L(b3) 104 105 L(b1): dec n 106 jne L(gt1) 107 mov %rax, (rp) 108 jmp L(ret) 109 L(gt1): lea 8(up,n,8), up 110 lea -8(rp,n,8), rp 111 neg n 112 xor %r10, %r10 113 xor R32(%rbx), R32(%rbx) 114 mov %rax, %r9 115 mov (up,n,8), %rax 116 mov %rdx, %r8 117 jmp L(L1) 118 119 L(b0): lea (up,n,8), up 120 lea -16(rp,n,8), rp 121 neg n 122 xor %r10, %r10 123 mov %rax, %r8 124 mov %rdx, %rbx 125 jmp L(L0) 126 127 L(b3): lea -8(up,n,8), up 128 lea -24(rp,n,8), rp 129 neg n 130 mov %rax, %rbx 131 mov %rdx, %r10 132 jmp L(L3) 133 134 L(b2): lea -16(up,n,8), up 135 lea -32(rp,n,8), rp 136 neg n 137 xor %r8, %r8 138 xor R32(%rbx), R32(%rbx) 139 mov %rax, %r10 140 mov 24(up,n,8), %rax 141 mov %rdx, %r9 142 jmp L(L2) 143 144 ALIGN(16) 145 L(top): mov %r10, (rp,n,8) 146 add %rax, %r9 147 mov (up,n,8), %rax 148 adc %rdx, %r8 149 mov $0, R32(%r10) 150 L(L1): mul vl 151 mov %r9, 8(rp,n,8) 152 add %rax, %r8 153 adc %rdx, %rbx 154 L(L0): mov 8(up,n,8), %rax 155 mul vl 156 mov %r8, 16(rp,n,8) 157 add %rax, %rbx 158 adc %rdx, %r10 159 L(L3): mov 16(up,n,8), %rax 160 mul vl 161 mov %rbx, 24(rp,n,8) 162 mov $0, R32(%r8) C zero 163 mov %r8, %rbx C zero 164 add %rax, %r10 165 mov 24(up,n,8), %rax 166 mov %r8, %r9 C zero 167 adc %rdx, %r9 168 L(L2): mul vl 169 add $4, n 170 js L(top) 171 172 mov %r10, (rp,n,8) 173 add %rax, %r9 174 adc %r8, %rdx 175 mov %r9, 8(rp,n,8) 176 add %r8, %rdx 177 L(ret): mov %rdx, %rax 178 179 pop %rbx 180 IFDOS(``pop %rdi '') 181 IFDOS(``pop %rsi '') 182 ret 183 EPILOGUE()