github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/mul_2.asm (about) 1 dnl AMD64 mpn_mul_2 -- Multiply an n-limb vector with a 2-limb vector and 2 dnl store the result in a third limb vector. 3 4 dnl Copyright 2008, 2011, 2012 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C cycles/limb 35 C AMD K8,K9 2.275 36 C AMD K10 2.275 37 C Intel P4 13.5 38 C Intel core2 4.0 39 C Intel corei 3.8 40 C Intel atom ? 41 C VIA nano ? 42 43 C This code is the result of running a code generation and optimization tool 44 C suite written by David Harvey and Torbjorn Granlund. 45 46 C TODO 47 C * Work on feed-in and wind-down code. 48 C * Convert "mov $0" to "xor". 49 C * Adjust initial lea to save some bytes. 50 C * Perhaps adjust n from n_param&3 value? 51 C * Replace with 2.25 c/l sequence. 52 53 C INPUT PARAMETERS 54 define(`rp', `%rdi') 55 define(`up', `%rsi') 56 define(`n_param',`%rdx') 57 define(`vp', `%rcx') 58 59 define(`v0', `%r8') 60 define(`v1', `%r9') 61 define(`w0', `%rbx') 62 define(`w1', `%rcx') 63 define(`w2', `%rbp') 64 define(`w3', `%r10') 65 define(`n', `%r11') 66 67 ABI_SUPPORT(DOS64) 68 ABI_SUPPORT(STD64) 69 70 ASM_START() 71 TEXT 72 ALIGN(16) 73 PROLOGUE(mpn_mul_2) 74 FUNC_ENTRY(4) 75 push %rbx 76 push %rbp 77 78 mov (vp), v0 79 mov 8(vp), v1 80 81 mov (up), %rax 82 83 mov n_param, n 84 neg n 85 lea -8(up,n_param,8), up 86 lea -8(rp,n_param,8), rp 87 88 and $3, R32(n_param) 89 jz L(m2p0) 90 cmp $2, R32(n_param) 91 jc L(m2p1) 92 jz L(m2p2) 93 L(m2p3): 94 mul v0 95 xor R32(w3), R32(w3) 96 mov %rax, w1 97 mov %rdx, w2 98 mov 8(up,n,8), %rax 99 add $-1, n 100 mul v1 101 add %rax, w2 102 jmp L(m23) 103 L(m2p0): 104 mul v0 105 xor R32(w2), R32(w2) 106 mov %rax, w0 107 mov %rdx, w1 108 jmp L(m20) 109 L(m2p1): 110 mul v0 111 xor R32(w3), R32(w3) 112 xor R32(w0), R32(w0) 113 xor R32(w1), R32(w1) 114 add $1, n 115 jmp L(m2top) 116 L(m2p2): 117 mul v0 118 xor R32(w0), R32(w0) 119 xor R32(w1), R32(w1) 120 mov %rax, w2 121 mov %rdx, w3 122 mov 8(up,n,8), %rax 123 add $-2, n 124 jmp L(m22) 125 126 127 ALIGN(32) 128 L(m2top): 129 add %rax, w3 130 adc %rdx, w0 131 mov 0(up,n,8), %rax 132 adc $0, R32(w1) 133 mov $0, R32(w2) 134 mul v1 135 add %rax, w0 136 mov w3, 0(rp,n,8) 137 adc %rdx, w1 138 mov 8(up,n,8), %rax 139 mul v0 140 add %rax, w0 141 adc %rdx, w1 142 adc $0, R32(w2) 143 L(m20): mov 8(up,n,8), %rax 144 mul v1 145 add %rax, w1 146 adc %rdx, w2 147 mov 16(up,n,8), %rax 148 mov $0, R32(w3) 149 mul v0 150 add %rax, w1 151 mov 16(up,n,8), %rax 152 adc %rdx, w2 153 adc $0, R32(w3) 154 mul v1 155 add %rax, w2 156 mov w0, 8(rp,n,8) 157 L(m23): adc %rdx, w3 158 mov 24(up,n,8), %rax 159 mul v0 160 mov $0, R32(w0) 161 add %rax, w2 162 adc %rdx, w3 163 mov w1, 16(rp,n,8) 164 mov 24(up,n,8), %rax 165 mov $0, R32(w1) 166 adc $0, R32(w0) 167 L(m22): mul v1 168 add %rax, w3 169 mov w2, 24(rp,n,8) 170 adc %rdx, w0 171 mov 32(up,n,8), %rax 172 mul v0 173 add $4, n 174 js L(m2top) 175 176 177 add %rax, w3 178 adc %rdx, w0 179 adc $0, R32(w1) 180 mov (up), %rax 181 mul v1 182 mov w3, (rp) 183 add %rax, w0 184 adc %rdx, w1 185 mov w0, 8(rp) 186 mov w1, %rax 187 188 pop %rbp 189 pop %rbx 190 FUNC_EXIT() 191 ret 192 EPILOGUE()