github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/mul_basecase.asm (about) 1 dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication. 2 3 dnl Copyright 1996, 1998-2000, 2002 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C P5: 14.2 cycles/crossproduct (approx) 35 36 37 C void mpn_mul_basecase (mp_ptr wp, 38 C mp_srcptr xp, mp_size_t xsize, 39 C mp_srcptr yp, mp_size_t ysize); 40 41 defframe(PARAM_YSIZE, 20) 42 defframe(PARAM_YP, 16) 43 defframe(PARAM_XSIZE, 12) 44 defframe(PARAM_XP, 8) 45 defframe(PARAM_WP, 4) 46 47 defframe(VAR_COUNTER, -4) 48 49 TEXT 50 ALIGN(8) 51 PROLOGUE(mpn_mul_basecase) 52 53 pushl %eax C dummy push for allocating stack slot 54 pushl %esi 55 pushl %ebp 56 pushl %edi 57 deflit(`FRAME',16) 58 59 movl PARAM_XP,%esi 60 movl PARAM_WP,%edi 61 movl PARAM_YP,%ebp 62 63 movl (%esi),%eax C load xp[0] 64 mull (%ebp) C multiply by yp[0] 65 movl %eax,(%edi) C store to wp[0] 66 movl PARAM_XSIZE,%ecx C xsize 67 decl %ecx C If xsize = 1, ysize = 1 too 68 jz L(done) 69 70 movl PARAM_XSIZE,%eax 71 pushl %ebx 72 FRAME_pushl() 73 movl %edx,%ebx 74 leal (%esi,%eax,4),%esi C make xp point at end 75 leal (%edi,%eax,4),%edi C offset wp by xsize 76 negl %ecx C negate j size/index for inner loop 77 xorl %eax,%eax C clear carry 78 79 ALIGN(8) 80 L(oop1): adcl $0,%ebx 81 movl (%esi,%ecx,4),%eax C load next limb at xp[j] 82 mull (%ebp) 83 addl %ebx,%eax 84 movl %eax,(%edi,%ecx,4) 85 incl %ecx 86 movl %edx,%ebx 87 jnz L(oop1) 88 89 adcl $0,%ebx 90 movl PARAM_YSIZE,%eax 91 movl %ebx,(%edi) C most significant limb of product 92 addl $4,%edi C increment wp 93 decl %eax 94 jz L(skip) 95 movl %eax,VAR_COUNTER C set index i to ysize 96 97 L(outer): 98 addl $4,%ebp C make ebp point to next y limb 99 movl PARAM_XSIZE,%ecx 100 negl %ecx 101 xorl %ebx,%ebx 102 103 C code at 0x61 here, close enough to aligned 104 L(oop2): 105 adcl $0,%ebx 106 movl (%esi,%ecx,4),%eax 107 mull (%ebp) 108 addl %ebx,%eax 109 movl (%edi,%ecx,4),%ebx 110 adcl $0,%edx 111 addl %eax,%ebx 112 movl %ebx,(%edi,%ecx,4) 113 incl %ecx 114 movl %edx,%ebx 115 jnz L(oop2) 116 117 adcl $0,%ebx 118 119 movl %ebx,(%edi) 120 addl $4,%edi 121 movl VAR_COUNTER,%eax 122 decl %eax 123 movl %eax,VAR_COUNTER 124 jnz L(outer) 125 126 L(skip): 127 popl %ebx 128 popl %edi 129 popl %ebp 130 popl %esi 131 addl $4,%esp 132 ret 133 134 L(done): 135 movl %edx,4(%edi) C store to wp[1] 136 popl %edi 137 popl %ebp 138 popl %esi 139 popl %eax C dummy pop for deallocating stack slot 140 ret 141 142 EPILOGUE() 143