github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/div_qr_2n_pi1.asm (about) 1 dnl x86-64 mpn_div_qr_2n_pi1 2 dnl -- Divide an mpn number by a normalized 2-limb number, 3 dnl using a single-limb inverse. 4 5 dnl Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 36 C c/l 37 C INPUT PARAMETERS 38 define(`qp', `%rdi') 39 define(`rp', `%rsi') 40 define(`up_param', `%rdx') 41 define(`un', `%rcx') 42 define(`d1', `%r8') 43 define(`d0', `%r9') 44 define(`di_param', `8(%rsp)') 45 46 define(`di', `%r10') 47 define(`up', `%r11') 48 define(`u2', `%rbx') 49 define(`u1', `%r12') 50 define(`t1', `%r13') 51 define(`t0', `%r14') 52 define(`md1', `%r15') 53 54 C TODO 55 C * Store qh in the same stack slot as di_param, instead of pushing 56 C it. (we could put it in register %rbp, but then we would need to 57 C save and restore that instead, which doesn't seem like a win). 58 59 ABI_SUPPORT(DOS64) 60 ABI_SUPPORT(STD64) 61 62 ASM_START() 63 TEXT 64 ALIGN(16) 65 PROLOGUE(mpn_div_qr_2n_pi1) 66 FUNC_ENTRY(4) 67 IFDOS(` mov 56(%rsp), %r8 ') 68 IFDOS(` mov 64(%rsp), %r9 ') 69 IFDOS(`define(`di_param', `72(%rsp)')') 70 mov di_param, di 71 mov up_param, up 72 push %r15 73 push %r14 74 push %r13 75 push %r12 76 push %rbx 77 78 mov -16(up, un, 8), u1 79 mov -8(up, un, 8), u2 80 81 mov u1, t0 82 mov u2, t1 83 sub d0, t0 84 sbb d1, t1 85 cmovnc t0, u1 86 cmovnc t1, u2 87 C push qh which is !carry 88 sbb %rax, %rax 89 inc %rax 90 push %rax 91 lea -2(un), un 92 mov d1, md1 93 neg md1 94 95 jmp L(next) 96 97 ALIGN(16) 98 L(loop): 99 C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di) 100 C Based on the optimized divrem_2.asm code. 101 102 mov di, %rax 103 mul u2 104 mov u1, t0 105 add %rax, t0 C q0 in t0 106 adc u2, %rdx 107 mov %rdx, t1 C q in t1 108 imul md1, %rdx 109 mov d0, %rax 110 lea (%rdx, u1), u2 111 mul t1 112 mov (up, un, 8), u1 113 sub d0, u1 114 sbb d1, u2 115 sub %rax, u1 116 sbb %rdx, u2 117 xor R32(%rax), R32(%rax) 118 xor R32(%rdx), R32(%rdx) 119 cmp t0, u2 120 cmovnc d0, %rax 121 cmovnc d1, %rdx 122 adc $0, t1 123 nop 124 add %rax, u1 125 adc %rdx, u2 126 cmp d1, u2 127 jae L(fix) 128 L(bck): 129 mov t1, (qp, un, 8) 130 L(next): 131 sub $1, un 132 jnc L(loop) 133 L(end): 134 mov u2, 8(rp) 135 mov u1, (rp) 136 137 C qh on stack 138 pop %rax 139 140 pop %rbx 141 pop %r12 142 pop %r13 143 pop %r14 144 pop %r15 145 FUNC_EXIT() 146 ret 147 148 L(fix): C Unlikely update. u2 >= d1 149 seta %dl 150 cmp d0, u1 151 setae %al 152 orb %dl, %al C "orb" form to placate Sun tools 153 je L(bck) 154 inc t1 155 sub d0, u1 156 sbb d1, u2 157 jmp L(bck) 158 EPILOGUE()