github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/aors_n.asm (about) 1 dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3 dnl Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C cycles/limb 35 C P5 3.375 36 C P6 3.125 37 C K6 3.5 38 C K7 2.25 39 C P4 8.75 40 41 42 ifdef(`OPERATION_add_n',` 43 define(M4_inst, adcl) 44 define(M4_function_n, mpn_add_n) 45 define(M4_function_nc, mpn_add_nc) 46 47 ',`ifdef(`OPERATION_sub_n',` 48 define(M4_inst, sbbl) 49 define(M4_function_n, mpn_sub_n) 50 define(M4_function_nc, mpn_sub_nc) 51 52 ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n 53 ')')') 54 55 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 56 57 58 C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 59 C mp_size_t size); 60 C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 61 C mp_size_t size, mp_limb_t carry); 62 63 defframe(PARAM_CARRY,20) 64 defframe(PARAM_SIZE, 16) 65 defframe(PARAM_SRC2, 12) 66 defframe(PARAM_SRC1, 8) 67 defframe(PARAM_DST, 4) 68 69 TEXT 70 ALIGN(8) 71 72 PROLOGUE(M4_function_nc) 73 deflit(`FRAME',0) 74 75 pushl %edi FRAME_pushl() 76 pushl %esi FRAME_pushl() 77 78 movl PARAM_DST,%edi 79 movl PARAM_SRC1,%esi 80 movl PARAM_SRC2,%edx 81 movl PARAM_SIZE,%ecx 82 83 movl %ecx,%eax 84 shrl $3,%ecx C compute count for unrolled loop 85 negl %eax 86 andl $7,%eax C get index where to start loop 87 jz L(oopgo) C necessary special case for 0 88 incl %ecx C adjust loop count 89 shll $2,%eax C adjustment for pointers... 90 subl %eax,%edi C ... since they are offset ... 91 subl %eax,%esi C ... by a constant when we ... 92 subl %eax,%edx C ... enter the loop 93 shrl $2,%eax C restore previous value 94 95 ifdef(`PIC',` 96 C Calculate start address in loop for PIC. Due to limitations in 97 C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal 98 call L(0a) 99 L(0a): leal (%eax,%eax,8),%eax 100 addl (%esp),%eax 101 addl $L(oop)-L(0a)-3,%eax 102 addl $4,%esp 103 ',` 104 C Calculate start address in loop for non-PIC. 105 leal L(oop)-3(%eax,%eax,8),%eax 106 ') 107 108 C These lines initialize carry from the 5th parameter. Should be 109 C possible to simplify. 110 pushl %ebp FRAME_pushl() 111 movl PARAM_CARRY,%ebp 112 shrl %ebp C shift bit 0 into carry 113 popl %ebp FRAME_popl() 114 115 jmp *%eax C jump into loop 116 117 EPILOGUE() 118 119 120 ALIGN(16) 121 PROLOGUE(M4_function_n) 122 deflit(`FRAME',0) 123 124 pushl %edi FRAME_pushl() 125 pushl %esi FRAME_pushl() 126 127 movl PARAM_DST,%edi 128 movl PARAM_SRC1,%esi 129 movl PARAM_SRC2,%edx 130 movl PARAM_SIZE,%ecx 131 132 movl %ecx,%eax 133 shrl $3,%ecx C compute count for unrolled loop 134 negl %eax 135 andl $7,%eax C get index where to start loop 136 jz L(oop) C necessary special case for 0 137 incl %ecx C adjust loop count 138 shll $2,%eax C adjustment for pointers... 139 subl %eax,%edi C ... since they are offset ... 140 subl %eax,%esi C ... by a constant when we ... 141 subl %eax,%edx C ... enter the loop 142 shrl $2,%eax C restore previous value 143 144 ifdef(`PIC',` 145 C Calculate start address in loop for PIC. Due to limitations in 146 C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal 147 call L(0b) 148 L(0b): leal (%eax,%eax,8),%eax 149 addl (%esp),%eax 150 addl $L(oop)-L(0b)-3,%eax 151 addl $4,%esp 152 ',` 153 C Calculate start address in loop for non-PIC. 154 leal L(oop)-3(%eax,%eax,8),%eax 155 ') 156 jmp *%eax C jump into loop 157 158 L(oopgo): 159 pushl %ebp FRAME_pushl() 160 movl PARAM_CARRY,%ebp 161 shrl %ebp C shift bit 0 into carry 162 popl %ebp FRAME_popl() 163 164 ALIGN(16) 165 L(oop): movl (%esi),%eax 166 M4_inst (%edx),%eax 167 movl %eax,(%edi) 168 movl 4(%esi),%eax 169 M4_inst 4(%edx),%eax 170 movl %eax,4(%edi) 171 movl 8(%esi),%eax 172 M4_inst 8(%edx),%eax 173 movl %eax,8(%edi) 174 movl 12(%esi),%eax 175 M4_inst 12(%edx),%eax 176 movl %eax,12(%edi) 177 movl 16(%esi),%eax 178 M4_inst 16(%edx),%eax 179 movl %eax,16(%edi) 180 movl 20(%esi),%eax 181 M4_inst 20(%edx),%eax 182 movl %eax,20(%edi) 183 movl 24(%esi),%eax 184 M4_inst 24(%edx),%eax 185 movl %eax,24(%edi) 186 movl 28(%esi),%eax 187 M4_inst 28(%edx),%eax 188 movl %eax,28(%edi) 189 leal 32(%edi),%edi 190 leal 32(%esi),%esi 191 leal 32(%edx),%edx 192 decl %ecx 193 jnz L(oop) 194 195 sbbl %eax,%eax 196 negl %eax 197 198 popl %esi 199 popl %edi 200 ret 201 202 EPILOGUE()