github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/k7/mul_1.asm (about) 1 dnl AMD K7 mpn_mul_1. 2 3 dnl Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C cycles/limb 35 C P5 36 C P6 model 0-8,10-12) 37 C P6 model 9 (Banias) 38 C P6 model 13 (Dothan) 39 C P4 model 0 (Willamette) 40 C P4 model 1 (?) 41 C P4 model 2 (Northwood) 42 C P4 model 3 (Prescott) 43 C P4 model 4 (Nocona) 44 C AMD K6 45 C AMD K7 3.25 46 C AMD K8 47 48 C TODO 49 C * Improve feed-in and wind-down code. We beat the old code for all n != 1, 50 C but we might be able to do even better. 51 C * The feed-in code for mul_1c is crude. 52 53 ASM_START() 54 TEXT 55 ALIGN(16) 56 PROLOGUE(mpn_mul_1c) 57 add $-16, %esp 58 mov %ebp, (%esp) 59 mov %ebx, 4(%esp) 60 mov %esi, 8(%esp) 61 mov %edi, 12(%esp) 62 63 mov 20(%esp), %edi 64 mov 24(%esp), %esi 65 mov 28(%esp), %ebp 66 mov 32(%esp), %ecx 67 mov %ebp, %ebx 68 shr $2, %ebp 69 mov %ebp, 28(%esp) 70 mov (%esi), %eax 71 and $3, %ebx 72 jz L(c0) 73 cmp $2, %ebx 74 mov 36(%esp), %ebx 75 jz L(c2) 76 jg L(c3) 77 78 L(c1): lea -4(%edi), %edi 79 mul %ecx 80 test %ebp, %ebp 81 jnz 1f 82 add %ebx, %eax 83 mov %eax, 4(%edi) 84 mov %edx, %eax 85 adc %ebp, %eax 86 jmp L(rt) 87 1: add %eax, %ebx 88 mov $0, %ebp 89 adc %edx, %ebp 90 mov 4(%esi), %eax 91 jmp L(1) 92 93 L(c2): lea 4(%esi), %esi 94 mul %ecx 95 test %ebp, %ebp 96 mov %ebx, %ebp 97 jnz 2f 98 add %eax, %ebp 99 mov $0, %ebx 100 adc %edx, %ebx 101 mov (%esi), %eax 102 jmp L(cj2) 103 2: add %eax, %ebp 104 mov $0, %ebx 105 adc %edx, %ebx 106 mov (%esi), %eax 107 jmp L(2) 108 109 L(c3): lea 8(%esi), %esi 110 lea -12(%edi), %edi 111 mul %ecx 112 add %eax, %ebx 113 mov $0, %ebp 114 adc %edx, %ebp 115 mov -4(%esi), %eax 116 incl 28(%esp) 117 jmp L(3) 118 119 L(c0): mov 36(%esp), %ebx 120 lea -4(%esi), %esi 121 lea -8(%edi), %edi 122 mul %ecx 123 mov %ebx, %ebp 124 add %eax, %ebp 125 mov $0, %ebx 126 adc %edx, %ebx 127 mov 8(%esi), %eax 128 jmp L(0) 129 130 EPILOGUE() 131 ALIGN(16) 132 PROLOGUE(mpn_mul_1) 133 add $-16, %esp 134 mov %ebp, (%esp) 135 mov %ebx, 4(%esp) 136 mov %esi, 8(%esp) 137 mov %edi, 12(%esp) 138 139 mov 20(%esp), %edi 140 mov 24(%esp), %esi 141 mov 28(%esp), %ebp 142 mov 32(%esp), %ecx 143 mov %ebp, %ebx 144 shr $2, %ebp 145 mov %ebp, 28(%esp) 146 mov (%esi), %eax 147 and $3, %ebx 148 jz L(b0) 149 cmp $2, %ebx 150 jz L(b2) 151 jg L(b3) 152 153 L(b1): lea -4(%edi), %edi 154 mul %ecx 155 test %ebp, %ebp 156 jnz L(gt1) 157 mov %eax, 4(%edi) 158 mov %edx, %eax 159 jmp L(rt) 160 L(gt1): mov %eax, %ebx 161 mov %edx, %ebp 162 mov 4(%esi), %eax 163 jmp L(1) 164 165 L(b2): lea 4(%esi), %esi 166 mul %ecx 167 test %ebp, %ebp 168 mov %eax, %ebp 169 mov %edx, %ebx 170 mov (%esi), %eax 171 jnz L(2) 172 jmp L(cj2) 173 174 L(b3): lea 8(%esi), %esi 175 lea -12(%edi), %edi 176 mul %ecx 177 mov %eax, %ebx 178 mov %edx, %ebp 179 mov -4(%esi), %eax 180 incl 28(%esp) 181 jmp L(3) 182 183 L(b0): lea -4(%esi), %esi 184 lea -8(%edi), %edi 185 mul %ecx 186 mov %eax, %ebp 187 mov %edx, %ebx 188 mov 8(%esi), %eax 189 jmp L(0) 190 191 ALIGN(16) 192 L(top): mov $0, %ebx 193 adc %edx, %ebx 194 L(2): mul %ecx 195 add %eax, %ebx 196 mov %ebp, 0(%edi) 197 mov 4(%esi), %eax 198 mov $0, %ebp 199 adc %edx, %ebp 200 L(1): mul %ecx 201 add %eax, %ebp 202 mov 8(%esi), %eax 203 mov %ebx, 4(%edi) 204 mov $0, %ebx 205 adc %edx, %ebx 206 L(0): mov %ebp, 8(%edi) 207 mul %ecx 208 add %eax, %ebx 209 mov 12(%esi), %eax 210 lea 16(%esi), %esi 211 mov $0, %ebp 212 adc %edx, %ebp 213 L(3): mov %ebx, 12(%edi) 214 mul %ecx 215 lea 16(%edi), %edi 216 add %eax, %ebp 217 decl 28(%esp) 218 mov 0(%esi), %eax 219 jnz L(top) 220 221 L(end): mov $0, %ebx 222 adc %edx, %ebx 223 L(cj2): mul %ecx 224 add %eax, %ebx 225 mov %ebp, (%edi) 226 L(cj1): mov %ebx, 4(%edi) 227 adc $0, %edx 228 mov %edx, %eax 229 230 L(rt): mov (%esp), %ebp 231 mov 4(%esp), %ebx 232 mov 8(%esp), %esi 233 mov 12(%esp), %edi 234 add $16, %esp 235 ret 236 EPILOGUE() 237 ASM_END()