github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/mul_2.asm (about) 1 dnl Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication. 2 3 dnl Copyright 2001, 2002 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C P5: 24.0 cycles/limb 35 36 37 C mp_limb_t mpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size, 38 C mp_srcptr mult); 39 C 40 C At 24 c/l this is only 2 cycles faster than a separate mul_1 and addmul_1, 41 C but has the advantage of making just one pass over the operands. 42 C 43 C There's not enough registers to use PARAM_MULT directly, so the multiplier 44 C limbs are transferred to local variables on the stack. 45 46 defframe(PARAM_MULT, 16) 47 defframe(PARAM_SIZE, 12) 48 defframe(PARAM_SRC, 8) 49 defframe(PARAM_DST, 4) 50 51 dnl re-use parameter space 52 define(VAR_MULT_LOW, `PARAM_SRC') 53 define(VAR_MULT_HIGH,`PARAM_DST') 54 55 TEXT 56 ALIGN(8) 57 PROLOGUE(mpn_mul_2) 58 deflit(`FRAME',0) 59 60 pushl %esi FRAME_pushl() 61 pushl %edi FRAME_pushl() 62 63 movl PARAM_SRC, %esi 64 movl PARAM_DST, %edi 65 66 movl PARAM_MULT, %eax 67 movl PARAM_SIZE, %ecx 68 69 movl 4(%eax), %edx C mult high 70 movl (%eax), %eax C mult low 71 72 movl %eax, VAR_MULT_LOW 73 movl %edx, VAR_MULT_HIGH 74 75 pushl %ebx FRAME_pushl() 76 pushl %ebp FRAME_pushl() 77 78 mull (%esi) C src[0] * mult[0] 79 80 movl %eax, %ebp C in case src==dst 81 movl (%esi), %eax C src[0] 82 83 movl %ebp, (%edi) C dst[0] 84 movl %edx, %ebx C initial low carry 85 86 xorl %ebp, %ebp C initial high carry 87 leal (%edi,%ecx,4), %edi C dst end 88 89 mull VAR_MULT_HIGH C src[0] * mult[1] 90 91 subl $2, %ecx C size-2 92 js L(done) 93 94 leal 8(%esi,%ecx,4), %esi C &src[size] 95 xorl $-1, %ecx C -(size-1) 96 97 98 99 L(top): 100 C eax low prod 101 C ebx low carry 102 C ecx counter, negative 103 C edx high prod 104 C esi src end 105 C edi dst end 106 C ebp high carry (0 or -1) 107 108 andl $1, %ebp C 1 or 0 109 addl %eax, %ebx 110 111 adcl %edx, %ebp 112 ASSERT(nc) 113 movl (%esi,%ecx,4), %eax 114 115 mull VAR_MULT_LOW 116 117 addl %eax, %ebx C low carry 118 movl (%esi,%ecx,4), %eax 119 120 adcl %ebp, %edx C high carry 121 movl %ebx, (%edi,%ecx,4) 122 123 sbbl %ebp, %ebp C new high carry, -1 or 0 124 movl %edx, %ebx C new low carry 125 126 mull VAR_MULT_HIGH 127 128 incl %ecx 129 jnz L(top) 130 131 132 L(done): 133 andl $1, %ebp C 1 or 0 134 addl %ebx, %eax 135 136 adcl %ebp, %edx 137 ASSERT(nc) 138 movl %eax, (%edi) C store carry low 139 140 movl %edx, %eax C return carry high 141 142 popl %ebp 143 popl %ebx 144 145 popl %edi 146 popl %esi 147 148 ret 149 150 EPILOGUE()