github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/mul_2.asm (about)

     1  dnl  Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication.
     2  
     3  dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C P5: 24.0 cycles/limb
    35  
    36  
    37  C mp_limb_t mpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size,
    38  C                      mp_srcptr mult);
    39  C
    40  C At 24 c/l this is only 2 cycles faster than a separate mul_1 and addmul_1,
    41  C but has the advantage of making just one pass over the operands.
    42  C
    43  C There's not enough registers to use PARAM_MULT directly, so the multiplier
    44  C limbs are transferred to local variables on the stack.
    45  
    46  defframe(PARAM_MULT, 16)
    47  defframe(PARAM_SIZE, 12)
    48  defframe(PARAM_SRC,   8)
    49  defframe(PARAM_DST,   4)
    50  
    51  dnl  re-use parameter space
    52  define(VAR_MULT_LOW, `PARAM_SRC')
    53  define(VAR_MULT_HIGH,`PARAM_DST')
    54  
    55  	TEXT
    56  	ALIGN(8)
    57  PROLOGUE(mpn_mul_2)
    58  deflit(`FRAME',0)
    59  
    60  	pushl	%esi		FRAME_pushl()
    61  	pushl	%edi		FRAME_pushl()
    62  
    63  	movl	PARAM_SRC, %esi
    64  	movl	PARAM_DST, %edi
    65  
    66  	movl	PARAM_MULT, %eax
    67  	movl	PARAM_SIZE, %ecx
    68  
    69  	movl	4(%eax), %edx		C mult high
    70  	movl	(%eax), %eax		C mult low
    71  
    72  	movl	%eax, VAR_MULT_LOW
    73  	movl	%edx, VAR_MULT_HIGH
    74  
    75  	pushl	%ebx		FRAME_pushl()
    76  	pushl	%ebp		FRAME_pushl()
    77  
    78  	mull	(%esi)			C src[0] * mult[0]
    79  
    80  	movl	%eax, %ebp		C in case src==dst
    81  	movl	(%esi), %eax		C src[0]
    82  
    83  	movl	%ebp, (%edi)		C dst[0]
    84  	movl	%edx, %ebx		C initial low carry
    85  
    86  	xorl	%ebp, %ebp		C initial high carry
    87  	leal	(%edi,%ecx,4), %edi	C dst end
    88  
    89  	mull	VAR_MULT_HIGH		C src[0] * mult[1]
    90  
    91  	subl	$2, %ecx		C size-2
    92  	js	L(done)
    93  
    94  	leal	8(%esi,%ecx,4), %esi	C &src[size]
    95  	xorl	$-1, %ecx		C -(size-1)
    96  
    97  
    98  
    99  L(top):
   100  	C eax	low prod
   101  	C ebx	low carry
   102  	C ecx	counter, negative
   103  	C edx	high prod
   104  	C esi	src end
   105  	C edi	dst end
   106  	C ebp	high carry (0 or -1)
   107  
   108  	andl	$1, %ebp		C 1 or 0
   109  	addl	%eax, %ebx
   110  
   111  	adcl	%edx, %ebp
   112  	ASSERT(nc)
   113  	movl	(%esi,%ecx,4), %eax
   114  
   115  	mull	VAR_MULT_LOW
   116  
   117  	addl	%eax, %ebx		C low carry
   118  	movl	(%esi,%ecx,4), %eax
   119  
   120  	adcl	%ebp, %edx		C high carry
   121  	movl	%ebx, (%edi,%ecx,4)
   122  
   123  	sbbl	%ebp, %ebp		C new high carry, -1 or 0
   124  	movl	%edx, %ebx		C new low carry
   125  
   126  	mull	VAR_MULT_HIGH
   127  
   128  	incl	%ecx
   129  	jnz	L(top)
   130  
   131  
   132  L(done):
   133  	andl	$1, %ebp		C 1 or 0
   134  	addl	%ebx, %eax
   135  
   136  	adcl	%ebp, %edx
   137  	ASSERT(nc)
   138  	movl	%eax, (%edi)		C store carry low
   139  
   140  	movl	%edx, %eax		C return carry high
   141  
   142  	popl	%ebp
   143  	popl	%ebx
   144  
   145  	popl	%edi
   146  	popl	%esi
   147  
   148  	ret
   149  
   150  EPILOGUE()