github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/k7/aorsmul_1.asm (about) 1 dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple. 2 3 dnl Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C cycles/limb 35 C P5 36 C P6 model 0-8,10-12 37 C P6 model 9 (Banias) 6.5 38 C P6 model 13 (Dothan) 39 C P4 model 0 (Willamette) 40 C P4 model 1 (?) 41 C P4 model 2 (Northwood) 42 C P4 model 3 (Prescott) 43 C P4 model 4 (Nocona) 44 C AMD K6 45 C AMD K7 3.75 46 C AMD K8 47 48 C TODO 49 C * Improve feed-in and wind-down code. We beat the old code for all n != 1, 50 C but lose by 2x for n == 1. 51 52 ifdef(`OPERATION_addmul_1',` 53 define(`ADDSUB', `add') 54 define(`func', `mpn_addmul_1') 55 ') 56 ifdef(`OPERATION_submul_1',` 57 define(`ADDSUB', `sub') 58 define(`func', `mpn_submul_1') 59 ') 60 61 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 62 63 ASM_START() 64 TEXT 65 ALIGN(16) 66 PROLOGUE(func) 67 add $-16, %esp 68 mov %ebp, (%esp) 69 mov %ebx, 4(%esp) 70 mov %esi, 8(%esp) 71 mov %edi, 12(%esp) 72 73 mov 20(%esp), %edi 74 mov 24(%esp), %esi 75 mov 28(%esp), %eax 76 mov 32(%esp), %ecx 77 mov %eax, %ebx 78 shr $2, %eax 79 mov %eax, 28(%esp) 80 mov (%esi), %eax 81 and $3, %ebx 82 jz L(b0) 83 cmp $2, %ebx 84 jz L(b2) 85 jg L(b3) 86 87 L(b1): lea -4(%esi), %esi 88 lea -4(%edi), %edi 89 mul %ecx 90 mov %eax, %ebx 91 mov %edx, %ebp 92 cmpl $0, 28(%esp) 93 jz L(cj1) 94 mov 8(%esi), %eax 95 jmp L(1) 96 97 L(b2): mul %ecx 98 mov %eax, %ebp 99 mov 4(%esi), %eax 100 mov %edx, %ebx 101 cmpl $0, 28(%esp) 102 jne L(2) 103 jmp L(cj2) 104 105 L(b3): lea -12(%esi), %esi 106 lea -12(%edi), %edi 107 mul %ecx 108 mov %eax, %ebx 109 mov %edx, %ebp 110 mov 16(%esi), %eax 111 incl 28(%esp) 112 jmp L(3) 113 114 L(b0): lea -8(%esi), %esi 115 lea -8(%edi), %edi 116 mul %ecx 117 mov %eax, %ebp 118 mov 12(%esi), %eax 119 mov %edx, %ebx 120 jmp L(0) 121 122 ALIGN(16) 123 L(top): lea 16(%edi), %edi 124 L(2): mul %ecx 125 ADDSUB %ebp, 0(%edi) 126 mov $0, %ebp 127 adc %eax, %ebx 128 mov 8(%esi), %eax 129 adc %edx, %ebp 130 L(1): mul %ecx 131 ADDSUB %ebx, 4(%edi) 132 mov $0, %ebx 133 adc %eax, %ebp 134 mov 12(%esi), %eax 135 adc %edx, %ebx 136 L(0): mul %ecx 137 ADDSUB %ebp, 8(%edi) 138 mov $0, %ebp 139 adc %eax, %ebx 140 adc %edx, %ebp 141 mov 16(%esi), %eax 142 L(3): mul %ecx 143 ADDSUB %ebx, 12(%edi) 144 adc %eax, %ebp 145 mov 20(%esi), %eax 146 lea 16(%esi), %esi 147 mov $0, %ebx 148 adc %edx, %ebx 149 decl 28(%esp) 150 jnz L(top) 151 152 L(end): lea 16(%edi), %edi 153 L(cj2): mul %ecx 154 ADDSUB %ebp, (%edi) 155 adc %eax, %ebx 156 adc $0, %edx 157 L(cj1): ADDSUB %ebx, 4(%edi) 158 adc $0, %edx 159 mov %edx, %eax 160 mov (%esp), %ebp 161 mov 4(%esp), %ebx 162 mov 8(%esp), %esi 163 mov 12(%esp), %edi 164 add $16, %esp 165 ret 166 EPILOGUE() 167 ASM_END()