github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/bdiv_q_1.asm (about) 1 dnl AMD64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by 2 dnl 1-limb divisor, returning quotient only. 3 4 dnl Copyright 2001, 2002, 2004-2006, 2009, 2011, 2012 Free Software 5 dnl Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 36 C cycles/limb 37 C AMD K8,K9 10 38 C AMD K10 10 39 C Intel P4 33 40 C Intel core2 13.25 41 C Intel corei 14 42 C Intel atom 42 43 C VIA nano ? 44 45 46 C INPUT PARAMETERS 47 define(`rp', `%rdi') 48 define(`up', `%rsi') 49 define(`n', `%rdx') 50 define(`d', `%rcx') 51 define(`di', `%r8') C just mpn_pi1_bdiv_q_1 52 define(`ncnt', `%r9') C just mpn_pi1_bdiv_q_1 53 54 ABI_SUPPORT(DOS64) 55 ABI_SUPPORT(STD64) 56 57 ASM_START() 58 TEXT 59 ALIGN(16) 60 PROLOGUE(mpn_bdiv_q_1) 61 FUNC_ENTRY(4) 62 push %rbx 63 64 mov %rcx, %rax 65 xor R32(%rcx), R32(%rcx) C ncnt count 66 mov %rdx, %r10 67 68 bt $0, R32(%rax) 69 jnc L(evn) C skip bsfq unless divisor is even 70 71 L(odd): mov %rax, %rbx 72 shr R32(%rax) 73 and $127, R32(%rax) C d/2, 7 bits 74 75 LEA( binvert_limb_table, %rdx) 76 77 movzbl (%rdx,%rax), R32(%rax) C inv 8 bits 78 79 mov %rbx, %r11 C d without twos 80 81 lea (%rax,%rax), R32(%rdx) C 2*inv 82 imul R32(%rax), R32(%rax) C inv*inv 83 imul R32(%rbx), R32(%rax) C inv*inv*d 84 sub R32(%rax), R32(%rdx) C inv = 2*inv - inv*inv*d, 16 bits 85 86 lea (%rdx,%rdx), R32(%rax) C 2*inv 87 imul R32(%rdx), R32(%rdx) C inv*inv 88 imul R32(%rbx), R32(%rdx) C inv*inv*d 89 sub R32(%rdx), R32(%rax) C inv = 2*inv - inv*inv*d, 32 bits 90 91 lea (%rax,%rax), %r8 C 2*inv 92 imul %rax, %rax C inv*inv 93 imul %rbx, %rax C inv*inv*d 94 sub %rax, %r8 C inv = 2*inv - inv*inv*d, 64 bits 95 96 jmp L(com) 97 98 L(evn): bsf %rax, %rcx 99 shr R8(%rcx), %rax 100 jmp L(odd) 101 EPILOGUE() 102 103 PROLOGUE(mpn_pi1_bdiv_q_1) 104 FUNC_ENTRY(4) 105 IFDOS(` mov 56(%rsp), %r8 ') 106 IFDOS(` mov 64(%rsp), %r9 ') 107 push %rbx 108 109 mov %rcx, %r11 C d 110 mov %rdx, %r10 C n 111 mov %r9, %rcx C ncnt 112 113 L(com): mov (up), %rax C up[0] 114 115 dec %r10 116 jz L(one) 117 118 mov 8(up), %rdx C up[1] 119 lea (up,%r10,8), up C up end 120 lea (rp,%r10,8), rp C rp end 121 neg %r10 C -n 122 123 shrd R8(%rcx), %rdx, %rax 124 125 xor R32(%rbx), R32(%rbx) 126 jmp L(ent) 127 128 ALIGN(8) 129 L(top): 130 C rax q 131 C rbx carry bit, 0 or 1 132 C rcx ncnt 133 C rdx 134 C r10 counter, limbs, negative 135 136 mul %r11 C carry limb in rdx 137 mov (up,%r10,8), %rax 138 mov 8(up,%r10,8), %r9 139 shrd R8(%rcx), %r9, %rax 140 nop 141 sub %rbx, %rax C apply carry bit 142 setc R8(%rbx) 143 sub %rdx, %rax C apply carry limb 144 adc $0, %rbx 145 L(ent): imul %r8, %rax 146 mov %rax, (rp,%r10,8) 147 inc %r10 148 jnz L(top) 149 150 mul %r11 C carry limb in rdx 151 mov (up), %rax C up high limb 152 shr R8(%rcx), %rax 153 sub %rbx, %rax C apply carry bit 154 sub %rdx, %rax C apply carry limb 155 imul %r8, %rax 156 mov %rax, (rp) 157 pop %rbx 158 FUNC_EXIT() 159 ret 160 161 L(one): shr R8(%rcx), %rax 162 imul %r8, %rax 163 mov %rax, (rp) 164 pop %rbx 165 FUNC_EXIT() 166 ret 167 EPILOGUE()