github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/divrem_2.asm (about) 1 dnl x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. 2 3 dnl Copyright 2007, 2008 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C norm frac 35 C 486 36 C P5 37 C P6-13 29.2 38 C P6-15 *26 39 C K6 40 C K7 22 41 C K8 *19 42 C P4-f1 43 C P4-f2 *65 44 C P4-f3 45 C P4-f4 *72 46 47 C A star means numbers not updated for the latest version of the code. 48 49 50 C TODO 51 C * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0. 52 C * The loop has not been carefully tuned. We should at the very least do 53 C some local insn swapping. 54 C * The code outside the main loop is what gcc generated. Clean up! 55 C * Clean up stack slot usage. 56 57 C INPUT PARAMETERS 58 C qp 59 C fn 60 C up_param 61 C un_param 62 C dp 63 64 65 C eax ebx ecx edx esi edi ebp 66 C cnt qp 67 68 ASM_START() 69 TEXT 70 ALIGN(16) 71 PROLOGUE(mpn_divrem_2) 72 push %ebp 73 push %edi 74 push %esi 75 push %ebx 76 sub $36, %esp 77 mov 68(%esp), %ecx C un 78 mov 72(%esp), %esi C dp 79 movl $0, 32(%esp) 80 lea 0(,%ecx,4), %edi 81 add 64(%esp), %edi C up 82 mov (%esi), %ebx 83 mov 4(%esi), %eax 84 mov %ebx, 20(%esp) 85 sub $12, %edi 86 mov %eax, 24(%esp) 87 mov %edi, 12(%esp) 88 mov 8(%edi), %ebx 89 mov 4(%edi), %ebp 90 cmp %eax, %ebx 91 jb L(8) 92 seta %dl 93 cmp 20(%esp), %ebp 94 setae %al 95 orb %dl, %al C "orb" form to placate Sun tools 96 jne L(35) 97 L(8): 98 mov 60(%esp), %esi C fn 99 lea -3(%esi,%ecx), %edi 100 test %edi, %edi 101 js L(9) 102 mov 24(%esp), %edx 103 mov $-1, %esi 104 mov %esi, %eax 105 mov %esi, %ecx 106 not %edx 107 divl 24(%esp) 108 mov %eax, %esi 109 imul 24(%esp), %eax 110 mov %eax, (%esp) 111 mov %esi, %eax 112 mull 20(%esp) 113 mov (%esp), %eax 114 add 20(%esp), %eax 115 adc $0, %ecx 116 add %eax, %edx 117 adc $0, %ecx 118 mov %ecx, %eax 119 js L(32) 120 L(36): dec %esi 121 sub 24(%esp), %edx 122 sbb $0, %eax 123 jns L(36) 124 L(32): 125 mov %esi, 16(%esp) C di 126 mov %edi, %ecx C un 127 mov 12(%esp), %esi C up 128 mov 24(%esp), %eax 129 neg %eax 130 mov %eax, 4(%esp) C -d1 131 ALIGN(16) 132 nop 133 134 C eax ebx ecx edx esi edi ebp 0 4 8 12 16 20 24 28 32 56 60 135 C n2 un up n1 q0 -d1 di d0 d1 msl qp fn 136 137 L(loop): 138 mov 16(%esp), %eax C di 139 mul %ebx 140 add %ebp, %eax 141 mov %eax, (%esp) C q0 142 adc %ebx, %edx 143 mov %edx, %edi C q 144 imul 4(%esp), %edx 145 mov 20(%esp), %eax 146 lea (%edx, %ebp), %ebx C n1 -= ... 147 mul %edi 148 xor %ebp, %ebp 149 cmp 60(%esp), %ecx 150 jl L(19) 151 mov (%esi), %ebp 152 sub $4, %esi 153 L(19): sub 20(%esp), %ebp 154 sbb 24(%esp), %ebx 155 sub %eax, %ebp 156 sbb %edx, %ebx 157 mov 20(%esp), %eax C d1 158 inc %edi 159 xor %edx, %edx 160 cmp (%esp), %ebx 161 adc $-1, %edx C mask 162 add %edx, %edi C q-- 163 and %edx, %eax C d0 or 0 164 and 24(%esp), %edx C d1 or 0 165 add %eax, %ebp 166 adc %edx, %ebx 167 cmp 24(%esp), %ebx 168 jae L(fix) 169 L(bck): mov 56(%esp), %edx 170 mov %edi, (%edx, %ecx, 4) 171 dec %ecx 172 jns L(loop) 173 174 L(9): mov 64(%esp), %esi C up 175 mov %ebp, (%esi) 176 mov %ebx, 4(%esi) 177 mov 32(%esp), %eax 178 add $36, %esp 179 pop %ebx 180 pop %esi 181 pop %edi 182 pop %ebp 183 ret 184 185 L(fix): seta %dl 186 cmp 20(%esp), %ebp 187 setae %al 188 orb %dl, %al C "orb" form to placate Sun tools 189 je L(bck) 190 inc %edi 191 sub 20(%esp), %ebp 192 sbb 24(%esp), %ebx 193 jmp L(bck) 194 195 L(35): sub 20(%esp), %ebp 196 sbb 24(%esp), %ebx 197 movl $1, 32(%esp) 198 jmp L(8) 199 EPILOGUE()