github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/k7/mod_1_4.asm (about) 1 dnl x86-32 mpn_mod_1s_4p, requiring cmov. 2 3 dnl Contributed to the GNU project by Torbjorn Granlund. 4 5 dnl Copyright 2009, 2010 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C P5 ? 37 C P6 model 0-8,10-12 ? 38 C P6 model 9 (Banias) ? 39 C P6 model 13 (Dothan) 6 40 C P4 model 0 (Willamette) ? 41 C P4 model 1 (?) ? 42 C P4 model 2 (Northwood) 15.5 43 C P4 model 3 (Prescott) ? 44 C P4 model 4 (Nocona) ? 45 C AMD K6 ? 46 C AMD K7 4.75 47 C AMD K8 ? 48 49 ASM_START() 50 TEXT 51 ALIGN(16) 52 PROLOGUE(mpn_mod_1s_4p) 53 push %ebp 54 push %edi 55 push %esi 56 push %ebx 57 sub $28, %esp 58 mov 60(%esp), %edi C cps[] 59 mov 8(%edi), %eax 60 mov 12(%edi), %edx 61 mov 16(%edi), %ecx 62 mov 20(%edi), %esi 63 mov 24(%edi), %edi 64 mov %eax, 4(%esp) 65 mov %edx, 8(%esp) 66 mov %ecx, 12(%esp) 67 mov %esi, 16(%esp) 68 mov %edi, 20(%esp) 69 mov 52(%esp), %eax C n 70 xor %edi, %edi 71 mov 48(%esp), %esi C up 72 lea -12(%esi,%eax,4), %esi 73 and $3, %eax 74 je L(b0) 75 cmp $2, %eax 76 jc L(b1) 77 je L(b2) 78 79 L(b3): mov 4(%esi), %eax 80 mull 4(%esp) 81 mov (%esi), %ebp 82 add %eax, %ebp 83 adc %edx, %edi 84 mov 8(%esi), %eax 85 mull 8(%esp) 86 lea -12(%esi), %esi 87 jmp L(m0) 88 89 L(b0): mov (%esi), %eax 90 mull 4(%esp) 91 mov -4(%esi), %ebp 92 add %eax, %ebp 93 adc %edx, %edi 94 mov 4(%esi), %eax 95 mull 8(%esp) 96 add %eax, %ebp 97 adc %edx, %edi 98 mov 8(%esi), %eax 99 mull 12(%esp) 100 lea -16(%esi), %esi 101 jmp L(m0) 102 103 L(b1): mov 8(%esi), %ebp 104 lea -4(%esi), %esi 105 jmp L(m1) 106 107 L(b2): mov 8(%esi), %edi 108 mov 4(%esi), %ebp 109 lea -8(%esi), %esi 110 jmp L(m1) 111 112 ALIGN(16) 113 L(top): mov (%esi), %eax 114 mull 4(%esp) 115 mov -4(%esi), %ebx 116 xor %ecx, %ecx 117 add %eax, %ebx 118 adc %edx, %ecx 119 mov 4(%esi), %eax 120 mull 8(%esp) 121 add %eax, %ebx 122 adc %edx, %ecx 123 mov 8(%esi), %eax 124 mull 12(%esp) 125 add %eax, %ebx 126 adc %edx, %ecx 127 lea -16(%esi), %esi 128 mov 16(%esp), %eax 129 mul %ebp 130 add %eax, %ebx 131 adc %edx, %ecx 132 mov 20(%esp), %eax 133 mul %edi 134 mov %ebx, %ebp 135 mov %ecx, %edi 136 L(m0): add %eax, %ebp 137 adc %edx, %edi 138 L(m1): subl $4, 52(%esp) 139 ja L(top) 140 141 L(end): mov 4(%esp), %eax 142 mul %edi 143 mov 60(%esp), %edi 144 add %eax, %ebp 145 adc $0, %edx 146 mov 4(%edi), %ecx 147 mov %edx, %esi 148 mov %ebp, %eax 149 sal %cl, %esi 150 mov %ecx, %ebx 151 neg %ecx 152 shr %cl, %eax 153 or %esi, %eax 154 lea 1(%eax), %esi 155 mull (%edi) 156 mov %ebx, %ecx 157 mov %eax, %ebx 158 mov %ebp, %eax 159 mov 56(%esp), %ebp 160 sal %cl, %eax 161 add %eax, %ebx 162 adc %esi, %edx 163 imul %ebp, %edx 164 sub %edx, %eax 165 lea (%eax,%ebp), %edx 166 cmp %eax, %ebx 167 cmovc( %edx, %eax) 168 mov %eax, %edx 169 sub %ebp, %eax 170 cmovc( %edx, %eax) 171 add $28, %esp 172 pop %ebx 173 pop %esi 174 pop %edi 175 pop %ebp 176 shr %cl, %eax 177 ret 178 EPILOGUE() 179 180 ALIGN(16) 181 PROLOGUE(mpn_mod_1s_4p_cps) 182 C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm 183 push %ebp 184 push %edi 185 push %esi 186 push %ebx 187 mov 20(%esp), %ebp C FIXME: avoid bp for 0-idx 188 mov 24(%esp), %ebx 189 bsr %ebx, %ecx 190 xor $31, %ecx 191 sal %cl, %ebx C b << cnt 192 mov %ebx, %edx 193 not %edx 194 mov $-1, %eax 195 div %ebx 196 xor %edi, %edi 197 sub %ebx, %edi 198 mov $1, %esi 199 mov %eax, (%ebp) C store bi 200 mov %ecx, 4(%ebp) C store cnt 201 shld %cl, %eax, %esi 202 imul %edi, %esi 203 mov %eax, %edi 204 mul %esi 205 206 add %esi, %edx 207 shr %cl, %esi 208 mov %esi, 8(%ebp) C store B1modb 209 210 not %edx 211 imul %ebx, %edx 212 lea (%edx,%ebx), %esi 213 cmp %edx, %eax 214 cmovnc( %edx, %esi) 215 mov %edi, %eax 216 mul %esi 217 218 add %esi, %edx 219 shr %cl, %esi 220 mov %esi, 12(%ebp) C store B2modb 221 222 not %edx 223 imul %ebx, %edx 224 lea (%edx,%ebx), %esi 225 cmp %edx, %eax 226 cmovnc( %edx, %esi) 227 mov %edi, %eax 228 mul %esi 229 230 add %esi, %edx 231 shr %cl, %esi 232 mov %esi, 16(%ebp) C store B3modb 233 234 not %edx 235 imul %ebx, %edx 236 lea (%edx,%ebx), %esi 237 cmp %edx, %eax 238 cmovnc( %edx, %esi) 239 mov %edi, %eax 240 mul %esi 241 242 add %esi, %edx 243 shr %cl, %esi 244 mov %esi, 20(%ebp) C store B4modb 245 246 not %edx 247 imul %ebx, %edx 248 add %edx, %ebx 249 cmp %edx, %eax 250 cmovnc( %edx, %ebx) 251 252 shr %cl, %ebx 253 mov %ebx, 24(%ebp) C store B5modb 254 255 pop %ebx 256 pop %esi 257 pop %edi 258 pop %ebp 259 ret 260 EPILOGUE()