github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/mul_basecase.asm (about)

     1  dnl  Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
     2  
     3  dnl  Copyright 1996, 1998-2000, 2002 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C P5: 14.2 cycles/crossproduct (approx)
    35  
    36  
    37  C void mpn_mul_basecase (mp_ptr wp,
    38  C                        mp_srcptr xp, mp_size_t xsize,
    39  C                        mp_srcptr yp, mp_size_t ysize);
    40  
    41  defframe(PARAM_YSIZE, 20)
    42  defframe(PARAM_YP,    16)
    43  defframe(PARAM_XSIZE, 12)
    44  defframe(PARAM_XP,    8)
    45  defframe(PARAM_WP,    4)
    46  
    47  defframe(VAR_COUNTER, -4)
    48  
    49  	TEXT
    50  	ALIGN(8)
    51  PROLOGUE(mpn_mul_basecase)
    52  
    53  	pushl	%eax			C dummy push for allocating stack slot
    54  	pushl	%esi
    55  	pushl	%ebp
    56  	pushl	%edi
    57  deflit(`FRAME',16)
    58  
    59  	movl	PARAM_XP,%esi
    60  	movl	PARAM_WP,%edi
    61  	movl	PARAM_YP,%ebp
    62  
    63  	movl	(%esi),%eax		C load xp[0]
    64  	mull	(%ebp)			C multiply by yp[0]
    65  	movl	%eax,(%edi)		C store to wp[0]
    66  	movl	PARAM_XSIZE,%ecx	C xsize
    67  	decl	%ecx			C If xsize = 1, ysize = 1 too
    68  	jz	L(done)
    69  
    70  	movl	PARAM_XSIZE,%eax
    71  	pushl	%ebx
    72  FRAME_pushl()
    73  	movl	%edx,%ebx
    74  	leal	(%esi,%eax,4),%esi	C make xp point at end
    75  	leal	(%edi,%eax,4),%edi	C offset wp by xsize
    76  	negl	%ecx			C negate j size/index for inner loop
    77  	xorl	%eax,%eax		C clear carry
    78  
    79  	ALIGN(8)
    80  L(oop1):	adcl	$0,%ebx
    81  	movl	(%esi,%ecx,4),%eax	C load next limb at xp[j]
    82  	mull	(%ebp)
    83  	addl	%ebx,%eax
    84  	movl	%eax,(%edi,%ecx,4)
    85  	incl	%ecx
    86  	movl	%edx,%ebx
    87  	jnz	L(oop1)
    88  
    89  	adcl	$0,%ebx
    90  	movl	PARAM_YSIZE,%eax
    91  	movl	%ebx,(%edi)		C most significant limb of product
    92  	addl	$4,%edi			C increment wp
    93  	decl	%eax
    94  	jz	L(skip)
    95  	movl	%eax,VAR_COUNTER	C set index i to ysize
    96  
    97  L(outer):
    98  	addl	$4,%ebp			C make ebp point to next y limb
    99  	movl	PARAM_XSIZE,%ecx
   100  	negl	%ecx
   101  	xorl	%ebx,%ebx
   102  
   103  	C code at 0x61 here, close enough to aligned
   104  L(oop2):
   105  	adcl	$0,%ebx
   106  	movl	(%esi,%ecx,4),%eax
   107  	mull	(%ebp)
   108  	addl	%ebx,%eax
   109  	movl	(%edi,%ecx,4),%ebx
   110  	adcl	$0,%edx
   111  	addl	%eax,%ebx
   112  	movl	%ebx,(%edi,%ecx,4)
   113  	incl	%ecx
   114  	movl	%edx,%ebx
   115  	jnz	L(oop2)
   116  
   117  	adcl	$0,%ebx
   118  
   119  	movl	%ebx,(%edi)
   120  	addl	$4,%edi
   121  	movl	VAR_COUNTER,%eax
   122  	decl	%eax
   123  	movl	%eax,VAR_COUNTER
   124  	jnz	L(outer)
   125  
   126  L(skip):
   127  	popl	%ebx
   128  	popl	%edi
   129  	popl	%ebp
   130  	popl	%esi
   131  	addl	$4,%esp
   132  	ret
   133  
   134  L(done):
   135  	movl	%edx,4(%edi)	C store to wp[1]
   136  	popl	%edi
   137  	popl	%ebp
   138  	popl	%esi
   139  	popl	%eax		C dummy pop for deallocating stack slot
   140  	ret
   141  
   142  EPILOGUE()
   143