github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/lshift.asm (about) 1 dnl AMD64 mpn_lshift -- mpn left shift. 2 3 dnl Copyright 2003, 2005, 2007, 2009, 2011, 2012 Free Software Foundation, 4 dnl Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 35 C cycles/limb cycles/limb cnt=1 36 C AMD K8,K9 2.375 1.375 37 C AMD K10 2.375 1.375 38 C Intel P4 8 10.5 39 C Intel core2 2.11 4.28 40 C Intel corei ? ? 41 C Intel atom 5.75 3.5 42 C VIA nano 3.5 2.25 43 44 45 C INPUT PARAMETERS 46 define(`rp', `%rdi') 47 define(`up', `%rsi') 48 define(`n', `%rdx') 49 define(`cnt', `%rcx') 50 51 ABI_SUPPORT(DOS64) 52 ABI_SUPPORT(STD64) 53 54 ASM_START() 55 TEXT 56 ALIGN(32) 57 PROLOGUE(mpn_lshift) 58 FUNC_ENTRY(4) 59 cmp $1, R8(%rcx) 60 jne L(gen) 61 62 C For cnt=1 we want to work from lowest limb towards higher limbs. 63 C Check for bad overlap (up=rp is OK!) up=rp+1..rp+n-1 is bad. 64 C FIXME: this could surely be done more cleverly. 65 66 mov rp, %rax 67 sub up, %rax 68 je L(fwd) C rp = up 69 shr $3, %rax 70 cmp n, %rax 71 jb L(gen) 72 73 L(fwd): mov R32(n), R32(%rax) 74 shr $2, n 75 je L(e1) 76 and $3, R32(%rax) 77 78 ALIGN(8) 79 nop 80 nop 81 L(t1): mov (up), %r8 82 mov 8(up), %r9 83 mov 16(up), %r10 84 mov 24(up), %r11 85 lea 32(up), up 86 adc %r8, %r8 87 mov %r8, (rp) 88 adc %r9, %r9 89 mov %r9, 8(rp) 90 adc %r10, %r10 91 mov %r10, 16(rp) 92 adc %r11, %r11 93 mov %r11, 24(rp) 94 lea 32(rp), rp 95 dec n 96 jne L(t1) 97 98 inc R32(%rax) 99 dec R32(%rax) 100 jne L(n00) 101 adc R32(%rax), R32(%rax) 102 FUNC_EXIT() 103 ret 104 L(e1): test R32(%rax), R32(%rax) C clear cy 105 L(n00): mov (up), %r8 106 dec R32(%rax) 107 jne L(n01) 108 adc %r8, %r8 109 mov %r8, (rp) 110 L(ret): adc R32(%rax), R32(%rax) 111 FUNC_EXIT() 112 ret 113 L(n01): dec R32(%rax) 114 mov 8(up), %r9 115 jne L(n10) 116 adc %r8, %r8 117 adc %r9, %r9 118 mov %r8, (rp) 119 mov %r9, 8(rp) 120 adc R32(%rax), R32(%rax) 121 FUNC_EXIT() 122 ret 123 L(n10): mov 16(up), %r10 124 adc %r8, %r8 125 adc %r9, %r9 126 adc %r10, %r10 127 mov %r8, (rp) 128 mov %r9, 8(rp) 129 mov %r10, 16(rp) 130 adc $-1, R32(%rax) 131 FUNC_EXIT() 132 ret 133 134 L(gen): neg R32(%rcx) C put rsh count in cl 135 mov -8(up,n,8), %rax 136 shr R8(%rcx), %rax C function return value 137 138 neg R32(%rcx) C put lsh count in cl 139 lea 1(n), R32(%r8) 140 and $3, R32(%r8) 141 je L(rlx) C jump for n = 3, 7, 11, ... 142 143 dec R32(%r8) 144 jne L(1) 145 C n = 4, 8, 12, ... 146 mov -8(up,n,8), %r10 147 shl R8(%rcx), %r10 148 neg R32(%rcx) C put rsh count in cl 149 mov -16(up,n,8), %r8 150 shr R8(%rcx), %r8 151 or %r8, %r10 152 mov %r10, -8(rp,n,8) 153 dec n 154 jmp L(rll) 155 156 L(1): dec R32(%r8) 157 je L(1x) C jump for n = 1, 5, 9, 13, ... 158 C n = 2, 6, 10, 16, ... 159 mov -8(up,n,8), %r10 160 shl R8(%rcx), %r10 161 neg R32(%rcx) C put rsh count in cl 162 mov -16(up,n,8), %r8 163 shr R8(%rcx), %r8 164 or %r8, %r10 165 mov %r10, -8(rp,n,8) 166 dec n 167 neg R32(%rcx) C put lsh count in cl 168 L(1x): 169 cmp $1, n 170 je L(ast) 171 mov -8(up,n,8), %r10 172 shl R8(%rcx), %r10 173 mov -16(up,n,8), %r11 174 shl R8(%rcx), %r11 175 neg R32(%rcx) C put rsh count in cl 176 mov -16(up,n,8), %r8 177 mov -24(up,n,8), %r9 178 shr R8(%rcx), %r8 179 or %r8, %r10 180 shr R8(%rcx), %r9 181 or %r9, %r11 182 mov %r10, -8(rp,n,8) 183 mov %r11, -16(rp,n,8) 184 sub $2, n 185 186 L(rll): neg R32(%rcx) C put lsh count in cl 187 L(rlx): mov -8(up,n,8), %r10 188 shl R8(%rcx), %r10 189 mov -16(up,n,8), %r11 190 shl R8(%rcx), %r11 191 192 sub $4, n C 4 193 jb L(end) C 2 194 ALIGN(16) 195 L(top): 196 C finish stuff from lsh block 197 neg R32(%rcx) C put rsh count in cl 198 mov 16(up,n,8), %r8 199 mov 8(up,n,8), %r9 200 shr R8(%rcx), %r8 201 or %r8, %r10 202 shr R8(%rcx), %r9 203 or %r9, %r11 204 mov %r10, 24(rp,n,8) 205 mov %r11, 16(rp,n,8) 206 C start two new rsh 207 mov 0(up,n,8), %r8 208 mov -8(up,n,8), %r9 209 shr R8(%rcx), %r8 210 shr R8(%rcx), %r9 211 212 C finish stuff from rsh block 213 neg R32(%rcx) C put lsh count in cl 214 mov 8(up,n,8), %r10 215 mov 0(up,n,8), %r11 216 shl R8(%rcx), %r10 217 or %r10, %r8 218 shl R8(%rcx), %r11 219 or %r11, %r9 220 mov %r8, 8(rp,n,8) 221 mov %r9, 0(rp,n,8) 222 C start two new lsh 223 mov -8(up,n,8), %r10 224 mov -16(up,n,8), %r11 225 shl R8(%rcx), %r10 226 shl R8(%rcx), %r11 227 228 sub $4, n 229 jae L(top) C 2 230 L(end): 231 neg R32(%rcx) C put rsh count in cl 232 mov 8(up), %r8 233 shr R8(%rcx), %r8 234 or %r8, %r10 235 mov (up), %r9 236 shr R8(%rcx), %r9 237 or %r9, %r11 238 mov %r10, 16(rp) 239 mov %r11, 8(rp) 240 241 neg R32(%rcx) C put lsh count in cl 242 L(ast): mov (up), %r10 243 shl R8(%rcx), %r10 244 mov %r10, (rp) 245 FUNC_EXIT() 246 ret 247 EPILOGUE()