github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/atom/sse2/aorsmul_1.asm (about) 1 dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom. 2 3 dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 4 5 dnl Copyright 2011 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C cycles/limb 37 C P5 - 38 C P6 model 0-8,10-12 - 39 C P6 model 9 (Banias) 40 C P6 model 13 (Dothan) 41 C P4 model 0 (Willamette) 42 C P4 model 1 (?) 43 C P4 model 2 (Northwood) 44 C P4 model 3 (Prescott) 45 C P4 model 4 (Nocona) 46 C Intel Atom 8 47 C AMD K6 48 C AMD K7 - 49 C AMD K8 50 C AMD K10 51 52 define(`rp', `%edi') 53 define(`up', `%esi') 54 define(`n', `%ecx') 55 56 ifdef(`OPERATION_addmul_1',` 57 define(ADDSUB, add) 58 define(func_1, mpn_addmul_1) 59 define(func_1c, mpn_addmul_1c)') 60 ifdef(`OPERATION_submul_1',` 61 define(ADDSUB, sub) 62 define(func_1, mpn_submul_1) 63 define(func_1c, mpn_submul_1c)') 64 65 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c) 66 67 TEXT 68 ALIGN(16) 69 PROLOGUE(func_1) 70 xor %edx, %edx 71 L(ent): push %edi 72 push %esi 73 push %ebx 74 mov 16(%esp), rp 75 mov 20(%esp), up 76 mov 24(%esp), n 77 movd 28(%esp), %mm7 78 test $1, n 79 jz L(fi0or2) 80 movd (up), %mm0 81 pmuludq %mm7, %mm0 82 shr $2, n 83 jnc L(fi1) 84 85 L(fi3): lea -8(up), up 86 lea -8(rp), rp 87 movd 12(up), %mm1 88 movd %mm0, %ebx 89 pmuludq %mm7, %mm1 90 add $1, n C increment and clear carry 91 jmp L(lo3) 92 93 L(fi1): movd %mm0, %ebx 94 jz L(wd1) 95 movd 4(up), %mm1 96 pmuludq %mm7, %mm1 97 jmp L(lo1) 98 99 L(fi0or2): 100 movd (up), %mm1 101 pmuludq %mm7, %mm1 102 shr $2, n 103 movd 4(up), %mm0 104 jc L(fi2) 105 lea -4(up), up 106 lea -4(rp), rp 107 movd %mm1, %eax 108 pmuludq %mm7, %mm0 109 jmp L(lo0) 110 111 L(fi2): lea 4(up), up 112 add $1, n C increment and clear carry 113 movd %mm1, %eax 114 lea -12(rp), rp 115 jmp L(lo2) 116 117 C ALIGN(16) C alignment seems irrelevant 118 L(top): movd 4(up), %mm1 119 adc $0, %edx 120 ADDSUB %eax, 12(rp) 121 movd %mm0, %ebx 122 pmuludq %mm7, %mm1 123 lea 16(rp), rp 124 L(lo1): psrlq $32, %mm0 125 adc %edx, %ebx 126 movd %mm0, %edx 127 movd %mm1, %eax 128 movd 8(up), %mm0 129 pmuludq %mm7, %mm0 130 adc $0, %edx 131 ADDSUB %ebx, (rp) 132 L(lo0): psrlq $32, %mm1 133 adc %edx, %eax 134 movd %mm1, %edx 135 movd %mm0, %ebx 136 movd 12(up), %mm1 137 pmuludq %mm7, %mm1 138 adc $0, %edx 139 ADDSUB %eax, 4(rp) 140 L(lo3): psrlq $32, %mm0 141 adc %edx, %ebx 142 movd %mm0, %edx 143 movd %mm1, %eax 144 lea 16(up), up 145 movd (up), %mm0 146 adc $0, %edx 147 ADDSUB %ebx, 8(rp) 148 L(lo2): psrlq $32, %mm1 149 adc %edx, %eax 150 movd %mm1, %edx 151 pmuludq %mm7, %mm0 152 dec n 153 jnz L(top) 154 155 L(end): adc n, %edx C n is zero here 156 ADDSUB %eax, 12(rp) 157 movd %mm0, %ebx 158 lea 16(rp), rp 159 L(wd1): psrlq $32, %mm0 160 adc %edx, %ebx 161 movd %mm0, %eax 162 adc n, %eax 163 ADDSUB %ebx, (rp) 164 emms 165 adc n, %eax 166 pop %ebx 167 pop %esi 168 pop %edi 169 ret 170 EPILOGUE() 171 PROLOGUE(func_1c) 172 mov 20(%esp), %edx C carry 173 jmp L(ent) 174 EPILOGUE()