github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/aors_n.asm (about)

     1  dnl  x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
     2  
     3  dnl  Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C     cycles/limb
    35  C P5	3.375
    36  C P6	3.125
    37  C K6	3.5
    38  C K7	2.25
    39  C P4	8.75
    40  
    41  
    42  ifdef(`OPERATION_add_n',`
    43  	define(M4_inst,        adcl)
    44  	define(M4_function_n,  mpn_add_n)
    45  	define(M4_function_nc, mpn_add_nc)
    46  
    47  ',`ifdef(`OPERATION_sub_n',`
    48  	define(M4_inst,        sbbl)
    49  	define(M4_function_n,  mpn_sub_n)
    50  	define(M4_function_nc, mpn_sub_nc)
    51  
    52  ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
    53  ')')')
    54  
    55  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
    56  
    57  
    58  C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    59  C                          mp_size_t size);
    60  C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    61  C	                    mp_size_t size, mp_limb_t carry);
    62  
    63  defframe(PARAM_CARRY,20)
    64  defframe(PARAM_SIZE, 16)
    65  defframe(PARAM_SRC2, 12)
    66  defframe(PARAM_SRC1, 8)
    67  defframe(PARAM_DST,  4)
    68  
    69  	TEXT
    70  	ALIGN(8)
    71  
    72  PROLOGUE(M4_function_nc)
    73  deflit(`FRAME',0)
    74  
    75  	pushl	%edi		FRAME_pushl()
    76  	pushl	%esi		FRAME_pushl()
    77  
    78  	movl	PARAM_DST,%edi
    79  	movl	PARAM_SRC1,%esi
    80  	movl	PARAM_SRC2,%edx
    81  	movl	PARAM_SIZE,%ecx
    82  
    83  	movl	%ecx,%eax
    84  	shrl	$3,%ecx			C compute count for unrolled loop
    85  	negl	%eax
    86  	andl	$7,%eax			C get index where to start loop
    87  	jz	L(oopgo)		C necessary special case for 0
    88  	incl	%ecx			C adjust loop count
    89  	shll	$2,%eax			C adjustment for pointers...
    90  	subl	%eax,%edi		C ... since they are offset ...
    91  	subl	%eax,%esi		C ... by a constant when we ...
    92  	subl	%eax,%edx		C ... enter the loop
    93  	shrl	$2,%eax			C restore previous value
    94  
    95  ifdef(`PIC',`
    96  	C Calculate start address in loop for PIC.  Due to limitations in
    97  	C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
    98  	call	L(0a)
    99  L(0a):	leal	(%eax,%eax,8),%eax
   100  	addl	(%esp),%eax
   101  	addl	$L(oop)-L(0a)-3,%eax
   102  	addl	$4,%esp
   103  ',`
   104  	C Calculate start address in loop for non-PIC.
   105  	leal	L(oop)-3(%eax,%eax,8),%eax
   106  ')
   107  
   108  	C These lines initialize carry from the 5th parameter.  Should be
   109  	C possible to simplify.
   110  	pushl	%ebp		FRAME_pushl()
   111  	movl	PARAM_CARRY,%ebp
   112  	shrl	%ebp			C shift bit 0 into carry
   113  	popl	%ebp		FRAME_popl()
   114  
   115  	jmp	*%eax			C jump into loop
   116  
   117  EPILOGUE()
   118  
   119  
   120  	ALIGN(16)
   121  PROLOGUE(M4_function_n)
   122  deflit(`FRAME',0)
   123  
   124  	pushl	%edi		FRAME_pushl()
   125  	pushl	%esi		FRAME_pushl()
   126  
   127  	movl	PARAM_DST,%edi
   128  	movl	PARAM_SRC1,%esi
   129  	movl	PARAM_SRC2,%edx
   130  	movl	PARAM_SIZE,%ecx
   131  
   132  	movl	%ecx,%eax
   133  	shrl	$3,%ecx			C compute count for unrolled loop
   134  	negl	%eax
   135  	andl	$7,%eax			C get index where to start loop
   136  	jz	L(oop)			C necessary special case for 0
   137  	incl	%ecx			C adjust loop count
   138  	shll	$2,%eax			C adjustment for pointers...
   139  	subl	%eax,%edi		C ... since they are offset ...
   140  	subl	%eax,%esi		C ... by a constant when we ...
   141  	subl	%eax,%edx		C ... enter the loop
   142  	shrl	$2,%eax			C restore previous value
   143  
   144  ifdef(`PIC',`
   145  	C Calculate start address in loop for PIC.  Due to limitations in
   146  	C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
   147  	call	L(0b)
   148  L(0b):	leal	(%eax,%eax,8),%eax
   149  	addl	(%esp),%eax
   150  	addl	$L(oop)-L(0b)-3,%eax
   151  	addl	$4,%esp
   152  ',`
   153  	C Calculate start address in loop for non-PIC.
   154  	leal	L(oop)-3(%eax,%eax,8),%eax
   155  ')
   156  	jmp	*%eax			C jump into loop
   157  
   158  L(oopgo):
   159  	pushl	%ebp		FRAME_pushl()
   160  	movl	PARAM_CARRY,%ebp
   161  	shrl	%ebp			C shift bit 0 into carry
   162  	popl	%ebp		FRAME_popl()
   163  
   164  	ALIGN(16)
   165  L(oop):	movl	(%esi),%eax
   166  	M4_inst	(%edx),%eax
   167  	movl	%eax,(%edi)
   168  	movl	4(%esi),%eax
   169  	M4_inst	4(%edx),%eax
   170  	movl	%eax,4(%edi)
   171  	movl	8(%esi),%eax
   172  	M4_inst	8(%edx),%eax
   173  	movl	%eax,8(%edi)
   174  	movl	12(%esi),%eax
   175  	M4_inst	12(%edx),%eax
   176  	movl	%eax,12(%edi)
   177  	movl	16(%esi),%eax
   178  	M4_inst	16(%edx),%eax
   179  	movl	%eax,16(%edi)
   180  	movl	20(%esi),%eax
   181  	M4_inst	20(%edx),%eax
   182  	movl	%eax,20(%edi)
   183  	movl	24(%esi),%eax
   184  	M4_inst	24(%edx),%eax
   185  	movl	%eax,24(%edi)
   186  	movl	28(%esi),%eax
   187  	M4_inst	28(%edx),%eax
   188  	movl	%eax,28(%edi)
   189  	leal	32(%edi),%edi
   190  	leal	32(%esi),%esi
   191  	leal	32(%edx),%edx
   192  	decl	%ecx
   193  	jnz	L(oop)
   194  
   195  	sbbl	%eax,%eax
   196  	negl	%eax
   197  
   198  	popl	%esi
   199  	popl	%edi
   200  	ret
   201  
   202  EPILOGUE()