github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/atom/aorslshC_n.asm (about) 1 dnl Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C) 2 3 dnl Contributed to the GNU project by Marco Bodrato. 4 5 dnl Copyright 2011 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size); 36 C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 37 C mp_limb_t carry); 38 C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,); 39 C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 40 C mp_signed_limb_t borrow); 41 42 defframe(PARAM_CORB, 16) 43 defframe(PARAM_SIZE, 12) 44 defframe(PARAM_SRC, 8) 45 defframe(PARAM_DST, 4) 46 47 C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 48 C mp_size_t size,); 49 C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 50 C mp_size_t size, mp_limb_t carry); 51 C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 52 C mp_size_t size,); 53 C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 54 C mp_size_t size, mp_limb_t borrow); 55 56 C if src1 == dst, _ip1 is used 57 58 C cycles/limb 59 C dst!=src1,src2 dst==src1 60 C P5 61 C P6 model 0-8,10-12 62 C P6 model 9 (Banias) 63 C P6 model 13 (Dothan) 64 C P4 model 0 (Willamette) 65 C P4 model 1 (?) 66 C P4 model 2 (Northwood) 67 C P4 model 3 (Prescott) 68 C P4 model 4 (Nocona) 69 C Intel Atom 7 6 70 C AMD K6 71 C AMD K7 72 C AMD K8 73 C AMD K10 74 75 defframe(GPARAM_CORB, 20) 76 defframe(GPARAM_SIZE, 16) 77 defframe(GPARAM_SRC2, 12) 78 79 dnl re-use parameter space 80 define(SAVE_EBP,`PARAM_SIZE') 81 define(SAVE_EBX,`PARAM_SRC') 82 define(SAVE_UP,`PARAM_DST') 83 84 define(M, eval(m4_lshift(1,LSH))) 85 define(`rp', `%edi') 86 define(`up', `%esi') 87 88 ASM_START() 89 TEXT 90 ALIGN(8) 91 92 PROLOGUE(M4_ip_function_c) 93 deflit(`FRAME',0) 94 movl PARAM_CORB, %ecx 95 movl %ecx, %edx 96 shr $LSH, %edx 97 andl $1, %edx 98 M4_opp %edx, %ecx 99 jmp L(start_nc) 100 EPILOGUE() 101 102 PROLOGUE(M4_ip_function) 103 deflit(`FRAME',0) 104 105 xor %ecx, %ecx 106 xor %edx, %edx 107 L(start_nc): 108 push rp FRAME_pushl() 109 mov PARAM_DST, rp 110 mov up, SAVE_UP 111 mov PARAM_SRC, up 112 mov %ebx, SAVE_EBX 113 mov PARAM_SIZE, %ebx C size 114 L(inplace): 115 incl %ebx C size + 1 116 shr %ebx C (size+1)\2 117 mov %ebp, SAVE_EBP 118 jnc L(entry) C size odd 119 120 add %edx, %edx C size even 121 mov %ecx, %ebp 122 mov (up), %ecx 123 lea -4(rp), rp 124 lea (%ebp,%ecx,M), %eax 125 lea 4(up), up 126 jmp L(enteven) 127 128 ALIGN(16) 129 L(oop): 130 lea (%ecx,%eax,M), %ebp 131 shr $RSH, %eax 132 mov 4(up), %ecx 133 add %edx, %edx 134 lea 8(up), up 135 M4_inst %ebp, (rp) 136 lea (%eax,%ecx,M), %eax 137 138 L(enteven): 139 M4_inst %eax, 4(rp) 140 lea 8(rp), rp 141 142 sbb %edx, %edx 143 shr $RSH, %ecx 144 145 L(entry): 146 mov (up), %eax 147 decl %ebx 148 jnz L(oop) 149 150 lea (%ecx,%eax,M), %ebp 151 shr $RSH, %eax 152 shr %edx 153 M4_inst %ebp, (rp) 154 mov SAVE_UP, up 155 adc $0, %eax 156 mov SAVE_EBP, %ebp 157 mov SAVE_EBX, %ebx 158 pop rp FRAME_popl() 159 ret 160 EPILOGUE() 161 162 PROLOGUE(M4_function_c) 163 deflit(`FRAME',0) 164 movl GPARAM_CORB, %ecx 165 movl %ecx, %edx 166 shr $LSH, %edx 167 andl $1, %edx 168 M4_opp %edx, %ecx 169 jmp L(generic_nc) 170 EPILOGUE() 171 172 PROLOGUE(M4_function) 173 deflit(`FRAME',0) 174 175 xor %ecx, %ecx 176 xor %edx, %edx 177 L(generic_nc): 178 push rp FRAME_pushl() 179 mov PARAM_DST, rp 180 mov up, SAVE_UP 181 mov PARAM_SRC, up 182 cmp rp, up 183 mov %ebx, SAVE_EBX 184 jne L(general) 185 mov GPARAM_SIZE, %ebx C size 186 mov GPARAM_SRC2, up 187 jmp L(inplace) 188 189 L(general): 190 mov GPARAM_SIZE, %eax C size 191 mov %ebx, SAVE_EBX 192 incl %eax C size + 1 193 mov up, %ebx C vp 194 mov GPARAM_SRC2, up C up 195 shr %eax C (size+1)\2 196 mov %ebp, SAVE_EBP 197 mov %eax, GPARAM_SIZE 198 jnc L(entry2) C size odd 199 200 add %edx, %edx C size even 201 mov %ecx, %ebp 202 mov (up), %ecx 203 lea -4(rp), rp 204 lea -4(%ebx), %ebx 205 lea (%ebp,%ecx,M), %eax 206 lea 4(up), up 207 jmp L(enteven2) 208 209 ALIGN(16) 210 L(oop2): 211 lea (%ecx,%eax,M), %ebp 212 shr $RSH, %eax 213 mov 4(up), %ecx 214 add %edx, %edx 215 lea 8(up), up 216 mov (%ebx), %edx 217 M4_inst %ebp, %edx 218 lea (%eax,%ecx,M), %eax 219 mov %edx, (rp) 220 L(enteven2): 221 mov 4(%ebx), %edx 222 lea 8(%ebx), %ebx 223 M4_inst %eax, %edx 224 mov %edx, 4(rp) 225 sbb %edx, %edx 226 shr $RSH, %ecx 227 lea 8(rp), rp 228 L(entry2): 229 mov (up), %eax 230 decl GPARAM_SIZE 231 jnz L(oop2) 232 233 lea (%ecx,%eax,M), %ebp 234 shr $RSH, %eax 235 shr %edx 236 mov (%ebx), %edx 237 M4_inst %ebp, %edx 238 mov %edx, (rp) 239 mov SAVE_UP, up 240 adc $0, %eax 241 mov SAVE_EBP, %ebp 242 mov SAVE_EBX, %ebx 243 pop rp FRAME_popl() 244 ret 245 EPILOGUE() 246 247 ASM_END()