github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/atom/lshift.asm (about) 1 dnl Intel Atom mpn_lshift -- mpn left shift. 2 3 dnl Copyright 2011 Free Software Foundation, Inc. 4 5 dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, 36 C unsigned cnt); 37 38 C cycles/limb 39 C cnt!=1 cnt==1 40 C P5 41 C P6 model 0-8,10-12 42 C P6 model 9 (Banias) 43 C P6 model 13 (Dothan) 44 C P4 model 0 (Willamette) 45 C P4 model 1 (?) 46 C P4 model 2 (Northwood) 47 C P4 model 3 (Prescott) 48 C P4 model 4 (Nocona) 49 C Intel Atom 5 2.5 50 C AMD K6 51 C AMD K7 52 C AMD K8 53 C AMD K10 54 55 defframe(PARAM_CNT, 16) 56 defframe(PARAM_SIZE,12) 57 defframe(PARAM_SRC, 8) 58 defframe(PARAM_DST, 4) 59 60 dnl re-use parameter space 61 define(SAVE_UP,`PARAM_CNT') 62 define(VAR_COUNT,`PARAM_SIZE') 63 define(SAVE_EBX,`PARAM_SRC') 64 define(SAVE_EBP,`PARAM_DST') 65 66 define(`rp', `%edi') 67 define(`up', `%esi') 68 define(`cnt', `%ecx') 69 70 ASM_START() 71 TEXT 72 ALIGN(8) 73 deflit(`FRAME',0) 74 PROLOGUE(mpn_lshift) 75 mov PARAM_CNT, cnt 76 mov PARAM_SIZE, %edx 77 mov up, SAVE_UP 78 mov PARAM_SRC, up 79 push rp FRAME_pushl() 80 mov PARAM_DST, rp 81 82 C We can use faster code for shift-by-1 under certain conditions. 83 cmp $1,cnt 84 jne L(normal) 85 cmpl rp, up 86 jnc L(special) C jump if s_ptr + 1 >= res_ptr 87 leal (up,%edx,4),%eax 88 cmpl %eax,rp 89 jnc L(special) C jump if res_ptr >= s_ptr + size 90 91 L(normal): 92 lea -4(up,%edx,4), up 93 mov %ebx, SAVE_EBX 94 lea -4(rp,%edx,4), rp 95 96 shr %edx 97 mov (up), %eax 98 mov %edx, VAR_COUNT 99 jnc L(evn) 100 101 mov %eax, %ebx 102 shl %cl, %ebx 103 neg cnt 104 shr %cl, %eax 105 test %edx, %edx 106 jnz L(gt1) 107 mov %ebx, (rp) 108 jmp L(quit) 109 110 L(gt1): mov %ebp, SAVE_EBP 111 push %eax 112 mov -4(up), %eax 113 mov %eax, %ebp 114 shr %cl, %eax 115 jmp L(lo1) 116 117 L(evn): mov %ebp, SAVE_EBP 118 neg cnt 119 mov %eax, %ebp 120 mov -4(up), %edx 121 shr %cl, %eax 122 mov %edx, %ebx 123 shr %cl, %edx 124 neg cnt 125 decl VAR_COUNT 126 lea 4(rp), rp 127 lea -4(up), up 128 jz L(end) 129 push %eax FRAME_pushl() 130 131 ALIGN(8) 132 L(top): shl %cl, %ebp 133 or %ebp, %edx 134 shl %cl, %ebx 135 neg cnt 136 mov -4(up), %eax 137 mov %eax, %ebp 138 mov %edx, -4(rp) 139 shr %cl, %eax 140 lea -8(rp), rp 141 L(lo1): mov -8(up), %edx 142 or %ebx, %eax 143 mov %edx, %ebx 144 shr %cl, %edx 145 lea -8(up), up 146 neg cnt 147 mov %eax, (rp) 148 decl VAR_COUNT 149 jg L(top) 150 151 pop %eax FRAME_popl() 152 L(end): 153 shl %cl, %ebp 154 shl %cl, %ebx 155 or %ebp, %edx 156 mov SAVE_EBP, %ebp 157 mov %edx, -4(rp) 158 mov %ebx, -8(rp) 159 160 L(quit): 161 mov SAVE_UP, up 162 mov SAVE_EBX, %ebx 163 pop rp FRAME_popl() 164 ret 165 166 L(special): 167 deflit(`FRAME',4) 168 lea 3(%edx), %eax C size + 3 169 dec %edx C size - 1 170 mov (up), %ecx 171 shr $2, %eax C (size + 3) / 4 172 and $3, %edx C (size - 1) % 4 173 jz L(goloop) C jmp if size == 1 (mod 4) 174 shr %edx 175 jnc L(odd) C jum if size == 3 (mod 4) 176 177 add %ecx, %ecx 178 lea 4(up), up 179 mov %ecx, (rp) 180 mov (up), %ecx 181 lea 4(rp), rp 182 183 dec %edx 184 jnz L(goloop) C jump if size == 0 (mod 4) 185 L(odd): lea -8(up), up 186 lea -8(rp), rp 187 jmp L(sentry) C reached if size == 2 or 3 (mod 4) 188 189 L(sloop): 190 adc %ecx, %ecx 191 mov 4(up), %edx 192 mov %ecx, (rp) 193 adc %edx, %edx 194 mov 8(up), %ecx 195 mov %edx, 4(rp) 196 L(sentry): 197 adc %ecx, %ecx 198 mov 12(up), %edx 199 mov %ecx, 8(rp) 200 adc %edx, %edx 201 lea 16(up), up 202 mov %edx, 12(rp) 203 lea 16(rp), rp 204 mov (up), %ecx 205 L(goloop): 206 decl %eax 207 jnz L(sloop) 208 209 L(squit): 210 adc %ecx, %ecx 211 mov %ecx, (rp) 212 adc %eax, %eax 213 214 mov SAVE_UP, up 215 pop rp FRAME_popl() 216 ret 217 EPILOGUE() 218 ASM_END()