github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/divrem_1.asm (about) 1 dnl x86-64 mpn_divrem_1 -- mpn by limb division. 2 3 dnl Copyright 2004, 2005, 2007-2012, 2014 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C norm unorm frac 35 C AMD K8,K9 13 13 12 36 C AMD K10 13 13 12 37 C Intel P4 43 44 43 38 C Intel core2 24.5 24.5 19.5 39 C Intel corei 20.5 19.5 18 40 C Intel atom 43 46 36 41 C VIA nano 25.5 25.5 24 42 43 C mp_limb_t 44 C mpn_divrem_1 (mp_ptr qp, mp_size_t fn, 45 C mp_srcptr np, mp_size_t nn, mp_limb_t d) 46 47 C mp_limb_t 48 C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn, 49 C mp_srcptr np, mp_size_t nn, mp_limb_t d, 50 C mp_limb_t dinv, int cnt) 51 52 C INPUT PARAMETERS 53 define(`qp', `%rdi') 54 define(`fn_param', `%rsi') 55 define(`up_param', `%rdx') 56 define(`un_param', `%rcx') 57 define(`d', `%r8') 58 define(`dinv', `%r9') C only for mpn_preinv_divrem_1 59 C shift passed on stack C only for mpn_preinv_divrem_1 60 61 define(`cnt', `%rcx') 62 define(`up', `%rsi') 63 define(`fn', `%r12') 64 define(`un', `%rbx') 65 66 67 C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 68 C cnt qp d dinv 69 70 ABI_SUPPORT(DOS64) 71 ABI_SUPPORT(STD64) 72 73 IFSTD(`define(`CNTOFF', `40($1)')') 74 IFDOS(`define(`CNTOFF', `104($1)')') 75 76 ASM_START() 77 TEXT 78 ALIGN(16) 79 PROLOGUE(mpn_preinv_divrem_1) 80 FUNC_ENTRY(4) 81 IFDOS(` mov 56(%rsp), %r8 ') 82 IFDOS(` mov 64(%rsp), %r9 ') 83 xor R32(%rax), R32(%rax) 84 push %r13 85 push %r12 86 push %rbp 87 push %rbx 88 89 mov fn_param, fn 90 mov un_param, un 91 add fn_param, un_param 92 mov up_param, up 93 94 lea -8(qp,un_param,8), qp 95 96 test d, d 97 js L(nent) 98 99 mov CNTOFF(%rsp), R8(cnt) 100 shl R8(cnt), d 101 jmp L(uent) 102 EPILOGUE() 103 104 ALIGN(16) 105 PROLOGUE(mpn_divrem_1) 106 FUNC_ENTRY(4) 107 IFDOS(` mov 56(%rsp), %r8 ') 108 xor R32(%rax), R32(%rax) 109 push %r13 110 push %r12 111 push %rbp 112 push %rbx 113 114 mov fn_param, fn 115 mov un_param, un 116 add fn_param, un_param 117 mov up_param, up 118 je L(ret) 119 120 lea -8(qp,un_param,8), qp 121 xor R32(%rbp), R32(%rbp) 122 123 test d, d 124 jns L(unnormalized) 125 126 L(normalized): 127 test un, un 128 je L(8) C un == 0 129 mov -8(up,un,8), %rbp 130 dec un 131 mov %rbp, %rax 132 sub d, %rbp 133 cmovc %rax, %rbp 134 sbb R32(%rax), R32(%rax) 135 inc R32(%rax) 136 mov %rax, (qp) 137 lea -8(qp), qp 138 L(8): 139 IFSTD(` push %rdi ') 140 IFSTD(` push %rsi ') 141 push %r8 142 IFSTD(` mov d, %rdi ') 143 IFDOS(` sub $32, %rsp ') 144 IFDOS(` mov d, %rcx ') 145 ASSERT(nz, `test $15, %rsp') 146 CALL( mpn_invert_limb) 147 IFDOS(` add $32, %rsp ') 148 pop %r8 149 IFSTD(` pop %rsi ') 150 IFSTD(` pop %rdi ') 151 152 mov %rax, dinv 153 mov %rbp, %rax 154 jmp L(nent) 155 156 ALIGN(16) 157 L(ntop):mov (up,un,8), %r10 C K8-K10 P6-CNR P6-NHM P4 158 mul dinv C 0,13 0,20 0,18 0,45 159 add %r10, %rax C 4 8 3 12 160 adc %rbp, %rdx C 5 9 10 13 161 mov %rax, %rbp C 5 9 4 13 162 mov %rdx, %r13 C 6 11 12 23 163 imul d, %rdx C 6 11 11 23 164 sub %rdx, %r10 C 10 16 14 33 165 mov d, %rax C 166 add %r10, %rax C 11 17 15 34 167 cmp %rbp, %r10 C 11 17 15 34 168 cmovc %r10, %rax C 12 18 16 35 169 adc $-1, %r13 C 170 cmp d, %rax C 171 jae L(nfx) C 172 L(nok): mov %r13, (qp) C 173 sub $8, qp C 174 L(nent):lea 1(%rax), %rbp C 175 dec un C 176 jns L(ntop) C 177 178 xor R32(%rcx), R32(%rcx) 179 jmp L(frac) 180 181 L(nfx): sub d, %rax 182 inc %r13 183 jmp L(nok) 184 185 L(unnormalized): 186 test un, un 187 je L(44) 188 mov -8(up,un,8), %rax 189 cmp d, %rax 190 jae L(44) 191 mov %rbp, (qp) 192 mov %rax, %rbp 193 lea -8(qp), qp 194 je L(ret) 195 dec un 196 L(44): 197 bsr d, %rcx 198 not R32(%rcx) 199 shl R8(%rcx), d 200 shl R8(%rcx), %rbp 201 202 push %rcx 203 IFSTD(` push %rdi ') 204 IFSTD(` push %rsi ') 205 push %r8 206 IFSTD(` sub $8, %rsp ') 207 IFSTD(` mov d, %rdi ') 208 IFDOS(` sub $40, %rsp ') 209 IFDOS(` mov d, %rcx ') 210 ASSERT(nz, `test $15, %rsp') 211 CALL( mpn_invert_limb) 212 IFSTD(` add $8, %rsp ') 213 IFDOS(` add $40, %rsp ') 214 pop %r8 215 IFSTD(` pop %rsi ') 216 IFSTD(` pop %rdi ') 217 pop %rcx 218 219 mov %rax, dinv 220 mov %rbp, %rax 221 test un, un 222 je L(frac) 223 224 L(uent):dec un 225 mov (up,un,8), %rbp 226 neg R32(%rcx) 227 shr R8(%rcx), %rbp 228 neg R32(%rcx) 229 or %rbp, %rax 230 jmp L(ent) 231 232 ALIGN(16) 233 L(utop):mov (up,un,8), %r10 234 shl R8(%rcx), %rbp 235 neg R32(%rcx) 236 shr R8(%rcx), %r10 237 neg R32(%rcx) 238 or %r10, %rbp 239 mul dinv 240 add %rbp, %rax 241 adc %r11, %rdx 242 mov %rax, %r11 243 mov %rdx, %r13 244 imul d, %rdx 245 sub %rdx, %rbp 246 mov d, %rax 247 add %rbp, %rax 248 cmp %r11, %rbp 249 cmovc %rbp, %rax 250 adc $-1, %r13 251 cmp d, %rax 252 jae L(ufx) 253 L(uok): mov %r13, (qp) 254 sub $8, qp 255 L(ent): mov (up,un,8), %rbp 256 dec un 257 lea 1(%rax), %r11 258 jns L(utop) 259 260 L(uend):shl R8(%rcx), %rbp 261 mul dinv 262 add %rbp, %rax 263 adc %r11, %rdx 264 mov %rax, %r11 265 mov %rdx, %r13 266 imul d, %rdx 267 sub %rdx, %rbp 268 mov d, %rax 269 add %rbp, %rax 270 cmp %r11, %rbp 271 cmovc %rbp, %rax 272 adc $-1, %r13 273 cmp d, %rax 274 jae L(efx) 275 L(eok): mov %r13, (qp) 276 sub $8, qp 277 jmp L(frac) 278 279 L(ufx): sub d, %rax 280 inc %r13 281 jmp L(uok) 282 L(efx): sub d, %rax 283 inc %r13 284 jmp L(eok) 285 286 L(frac):mov d, %rbp 287 neg %rbp 288 jmp L(fent) 289 290 ALIGN(16) C K8-K10 P6-CNR P6-NHM P4 291 L(ftop):mul dinv C 0,12 0,17 0,17 292 add %r11, %rdx C 5 8 10 293 mov %rax, %r11 C 4 8 3 294 mov %rdx, %r13 C 6 9 11 295 imul %rbp, %rdx C 6 9 11 296 mov d, %rax C 297 add %rdx, %rax C 10 14 14 298 cmp %r11, %rdx C 10 14 14 299 cmovc %rdx, %rax C 11 15 15 300 adc $-1, %r13 C 301 mov %r13, (qp) C 302 sub $8, qp C 303 L(fent):lea 1(%rax), %r11 C 304 dec fn C 305 jns L(ftop) C 306 307 shr R8(%rcx), %rax 308 L(ret): pop %rbx 309 pop %rbp 310 pop %r12 311 pop %r13 312 FUNC_EXIT() 313 ret 314 EPILOGUE()