github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/bdiv_q_1.asm (about) 1 dnl x86 mpn_bdiv_q_1 -- mpn by limb exact division. 2 3 dnl Rearranged from mpn/x86/dive_1.asm by Marco Bodrato. 4 5 dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 36 C cycles/limb 37 C P54 30.0 38 C P55 29.0 39 C P6 13.0 odd divisor, 12.0 even (strangely) 40 C K6 14.0 41 C K7 12.0 42 C P4 42.0 43 44 MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1) 45 46 defframe(PARAM_SHIFT, 24) 47 defframe(PARAM_INVERSE,20) 48 defframe(PARAM_DIVISOR,16) 49 defframe(PARAM_SIZE, 12) 50 defframe(PARAM_SRC, 8) 51 defframe(PARAM_DST, 4) 52 53 dnl re-use parameter space 54 define(VAR_INVERSE,`PARAM_SRC') 55 56 TEXT 57 58 C mp_limb_t 59 C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor, 60 C mp_limb_t inverse, int shift) 61 62 ALIGN(16) 63 PROLOGUE(mpn_pi1_bdiv_q_1) 64 deflit(`FRAME',0) 65 66 movl PARAM_SHIFT, %ecx 67 pushl %ebp FRAME_pushl() 68 69 movl PARAM_INVERSE, %eax 70 movl PARAM_SIZE, %ebp 71 pushl %ebx FRAME_pushl() 72 L(common): 73 pushl %edi FRAME_pushl() 74 pushl %esi FRAME_pushl() 75 76 movl PARAM_SRC, %esi 77 movl PARAM_DST, %edi 78 79 leal (%esi,%ebp,4), %esi C src end 80 leal (%edi,%ebp,4), %edi C dst end 81 negl %ebp C -size 82 83 movl %eax, VAR_INVERSE 84 movl (%esi,%ebp,4), %eax C src[0] 85 86 xorl %ebx, %ebx 87 xorl %edx, %edx 88 89 incl %ebp 90 jz L(one) 91 92 movl (%esi,%ebp,4), %edx C src[1] 93 94 shrdl( %cl, %edx, %eax) 95 96 movl VAR_INVERSE, %edx 97 jmp L(entry) 98 99 100 ALIGN(8) 101 nop C k6 code alignment 102 nop 103 L(top): 104 C eax q 105 C ebx carry bit, 0 or -1 106 C ecx shift 107 C edx carry limb 108 C esi src end 109 C edi dst end 110 C ebp counter, limbs, negative 111 112 movl -4(%esi,%ebp,4), %eax 113 subl %ebx, %edx C accumulate carry bit 114 115 movl (%esi,%ebp,4), %ebx 116 117 shrdl( %cl, %ebx, %eax) 118 119 subl %edx, %eax C apply carry limb 120 movl VAR_INVERSE, %edx 121 122 sbbl %ebx, %ebx 123 124 L(entry): 125 imull %edx, %eax 126 127 movl %eax, -4(%edi,%ebp,4) 128 movl PARAM_DIVISOR, %edx 129 130 mull %edx 131 132 incl %ebp 133 jnz L(top) 134 135 136 movl -4(%esi), %eax C src high limb 137 L(one): 138 shrl %cl, %eax 139 popl %esi FRAME_popl() 140 141 addl %ebx, %eax C apply carry bit 142 143 subl %edx, %eax C apply carry limb 144 145 imull VAR_INVERSE, %eax 146 147 movl %eax, -4(%edi) 148 149 popl %edi 150 popl %ebx 151 popl %ebp 152 153 ret 154 155 EPILOGUE() 156 157 C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 158 C mp_limb_t divisor); 159 C 160 161 ALIGN(16) 162 PROLOGUE(mpn_bdiv_q_1) 163 deflit(`FRAME',0) 164 165 movl PARAM_DIVISOR, %eax 166 pushl %ebp FRAME_pushl() 167 168 movl $-1, %ecx C shift count 169 movl PARAM_SIZE, %ebp 170 171 pushl %ebx FRAME_pushl() 172 173 L(strip_twos): 174 incl %ecx 175 176 shrl %eax 177 jnc L(strip_twos) 178 179 leal 1(%eax,%eax), %ebx C d without twos 180 andl $127, %eax C d/2, 7 bits 181 182 ifdef(`PIC',` 183 LEA( binvert_limb_table, %edx) 184 movzbl (%eax,%edx), %eax C inv 8 bits 185 ',` 186 movzbl binvert_limb_table(%eax), %eax C inv 8 bits 187 ') 188 189 leal (%eax,%eax), %edx C 2*inv 190 movl %ebx, PARAM_DIVISOR C d without twos 191 imull %eax, %eax C inv*inv 192 imull %ebx, %eax C inv*inv*d 193 subl %eax, %edx C inv = 2*inv - inv*inv*d 194 195 leal (%edx,%edx), %eax C 2*inv 196 imull %edx, %edx C inv*inv 197 imull %ebx, %edx C inv*inv*d 198 subl %edx, %eax C inv = 2*inv - inv*inv*d 199 200 ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS 201 pushl %eax FRAME_pushl() 202 imull PARAM_DIVISOR, %eax 203 cmpl $1, %eax 204 popl %eax FRAME_popl()') 205 206 jmp L(common) 207 EPILOGUE() 208 ASM_END()