github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/aorsmul_1.asm (about) 1 dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a 2 dnl limb and add the result to a second limb vector. 3 4 dnl Copyright 1992, 1994, 1997, 1999-2002, 2005 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C cycles/limb 35 C P5 14.75 36 C P6 model 0-8,10-12 7.5 37 C P6 model 9 (Banias) 6.7 38 C P6 model 13 (Dothan) 6.75 39 C P4 model 0 (Willamette) 24.0 40 C P4 model 1 (?) 24.0 41 C P4 model 2 (Northwood) 24.0 42 C P4 model 3 (Prescott) 43 C P4 model 4 (Nocona) 44 C Intel Atom 45 C AMD K6 12.5 46 C AMD K7 5.25 47 C AMD K8 48 C AMD K10 49 50 51 ifdef(`OPERATION_addmul_1',` 52 define(M4_inst, addl) 53 define(M4_function_1, mpn_addmul_1) 54 55 ',`ifdef(`OPERATION_submul_1',` 56 define(M4_inst, subl) 57 define(M4_function_1, mpn_submul_1) 58 59 ',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 60 ')')') 61 62 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 63 64 65 C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 66 C mp_limb_t mult); 67 68 define(PARAM_MULTIPLIER, `FRAME+16(%esp)') 69 define(PARAM_SIZE, `FRAME+12(%esp)') 70 define(PARAM_SRC, `FRAME+8(%esp)') 71 define(PARAM_DST, `FRAME+4(%esp)') 72 73 TEXT 74 ALIGN(8) 75 76 PROLOGUE(M4_function_1) 77 deflit(`FRAME',0) 78 79 pushl %edi 80 pushl %esi 81 pushl %ebx 82 pushl %ebp 83 deflit(`FRAME',16) 84 85 movl PARAM_DST,%edi 86 movl PARAM_SRC,%esi 87 movl PARAM_SIZE,%ecx 88 89 xorl %ebx,%ebx 90 andl $3,%ecx 91 jz L(end0) 92 93 L(oop0): 94 movl (%esi),%eax 95 mull PARAM_MULTIPLIER 96 leal 4(%esi),%esi 97 addl %ebx,%eax 98 movl $0,%ebx 99 adcl %ebx,%edx 100 M4_inst %eax,(%edi) 101 adcl %edx,%ebx C propagate carry into cylimb 102 103 leal 4(%edi),%edi 104 decl %ecx 105 jnz L(oop0) 106 107 L(end0): 108 movl PARAM_SIZE,%ecx 109 shrl $2,%ecx 110 jz L(end) 111 112 ALIGN(8) 113 L(oop): movl (%esi),%eax 114 mull PARAM_MULTIPLIER 115 addl %eax,%ebx 116 movl $0,%ebp 117 adcl %edx,%ebp 118 119 movl 4(%esi),%eax 120 mull PARAM_MULTIPLIER 121 M4_inst %ebx,(%edi) 122 adcl %eax,%ebp C new lo + cylimb 123 movl $0,%ebx 124 adcl %edx,%ebx 125 126 movl 8(%esi),%eax 127 mull PARAM_MULTIPLIER 128 M4_inst %ebp,4(%edi) 129 adcl %eax,%ebx C new lo + cylimb 130 movl $0,%ebp 131 adcl %edx,%ebp 132 133 movl 12(%esi),%eax 134 mull PARAM_MULTIPLIER 135 M4_inst %ebx,8(%edi) 136 adcl %eax,%ebp C new lo + cylimb 137 movl $0,%ebx 138 adcl %edx,%ebx 139 140 M4_inst %ebp,12(%edi) 141 adcl $0,%ebx C propagate carry into cylimb 142 143 leal 16(%esi),%esi 144 leal 16(%edi),%edi 145 decl %ecx 146 jnz L(oop) 147 148 L(end): movl %ebx,%eax 149 150 popl %ebp 151 popl %ebx 152 popl %esi 153 popl %edi 154 ret 155 156 EPILOGUE()