github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/aors_n.asm (about)

     1  dnl  Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
     2  
     3  dnl  Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C P5: 2.375 cycles/limb
    35  
    36  
    37  ifdef(`OPERATION_add_n',`
    38  	define(M4_inst,        adcl)
    39  	define(M4_function_n,  mpn_add_n)
    40  	define(M4_function_nc, mpn_add_nc)
    41  
    42  ',`ifdef(`OPERATION_sub_n',`
    43  	define(M4_inst,        sbbl)
    44  	define(M4_function_n,  mpn_sub_n)
    45  	define(M4_function_nc, mpn_sub_nc)
    46  
    47  ',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
    48  ')')')
    49  
    50  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
    51  
    52  
    53  C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    54  C                          mp_size_t size);
    55  C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    56  C                           mp_size_t size, mp_limb_t carry);
    57  
    58  defframe(PARAM_CARRY,20)
    59  defframe(PARAM_SIZE, 16)
    60  defframe(PARAM_SRC2, 12)
    61  defframe(PARAM_SRC1, 8)
    62  defframe(PARAM_DST,  4)
    63  
    64  	TEXT
    65  	ALIGN(8)
    66  PROLOGUE(M4_function_nc)
    67  
    68  	pushl	%edi
    69  	pushl	%esi
    70  	pushl	%ebx
    71  	pushl	%ebp
    72  deflit(`FRAME',16)
    73  
    74  	movl	PARAM_DST,%edi
    75  	movl	PARAM_SRC1,%esi
    76  	movl	PARAM_SRC2,%ebp
    77  	movl	PARAM_SIZE,%ecx
    78  
    79  	movl	(%ebp),%ebx
    80  
    81  	decl	%ecx
    82  	movl	%ecx,%edx
    83  	shrl	$3,%ecx
    84  	andl	$7,%edx
    85  	testl	%ecx,%ecx		C zero carry flag
    86  	jz	L(endgo)
    87  
    88  	pushl	%edx
    89  FRAME_pushl()
    90  	movl	PARAM_CARRY,%eax
    91  	shrl	%eax			C shift bit 0 into carry
    92  	jmp	L(oop)
    93  
    94  L(endgo):
    95  deflit(`FRAME',16)
    96  	movl	PARAM_CARRY,%eax
    97  	shrl	%eax			C shift bit 0 into carry
    98  	jmp	L(end)
    99  
   100  EPILOGUE()
   101  
   102  
   103  	ALIGN(8)
   104  PROLOGUE(M4_function_n)
   105  
   106  	pushl	%edi
   107  	pushl	%esi
   108  	pushl	%ebx
   109  	pushl	%ebp
   110  deflit(`FRAME',16)
   111  
   112  	movl	PARAM_DST,%edi
   113  	movl	PARAM_SRC1,%esi
   114  	movl	PARAM_SRC2,%ebp
   115  	movl	PARAM_SIZE,%ecx
   116  
   117  	movl	(%ebp),%ebx
   118  
   119  	decl	%ecx
   120  	movl	%ecx,%edx
   121  	shrl	$3,%ecx
   122  	andl	$7,%edx
   123  	testl	%ecx,%ecx		C zero carry flag
   124  	jz	L(end)
   125  	pushl	%edx
   126  FRAME_pushl()
   127  
   128  	ALIGN(8)
   129  L(oop):	movl	28(%edi),%eax		C fetch destination cache line
   130  	leal	32(%edi),%edi
   131  
   132  L(1):	movl	(%esi),%eax
   133  	movl	4(%esi),%edx
   134  	M4_inst	%ebx,%eax
   135  	movl	4(%ebp),%ebx
   136  	M4_inst	%ebx,%edx
   137  	movl	8(%ebp),%ebx
   138  	movl	%eax,-32(%edi)
   139  	movl	%edx,-28(%edi)
   140  
   141  L(2):	movl	8(%esi),%eax
   142  	movl	12(%esi),%edx
   143  	M4_inst	%ebx,%eax
   144  	movl	12(%ebp),%ebx
   145  	M4_inst	%ebx,%edx
   146  	movl	16(%ebp),%ebx
   147  	movl	%eax,-24(%edi)
   148  	movl	%edx,-20(%edi)
   149  
   150  L(3):	movl	16(%esi),%eax
   151  	movl	20(%esi),%edx
   152  	M4_inst	%ebx,%eax
   153  	movl	20(%ebp),%ebx
   154  	M4_inst	%ebx,%edx
   155  	movl	24(%ebp),%ebx
   156  	movl	%eax,-16(%edi)
   157  	movl	%edx,-12(%edi)
   158  
   159  L(4):	movl	24(%esi),%eax
   160  	movl	28(%esi),%edx
   161  	M4_inst	%ebx,%eax
   162  	movl	28(%ebp),%ebx
   163  	M4_inst	%ebx,%edx
   164  	movl	32(%ebp),%ebx
   165  	movl	%eax,-8(%edi)
   166  	movl	%edx,-4(%edi)
   167  
   168  	leal	32(%esi),%esi
   169  	leal	32(%ebp),%ebp
   170  	decl	%ecx
   171  	jnz	L(oop)
   172  
   173  	popl	%edx
   174  FRAME_popl()
   175  L(end):
   176  	decl	%edx			C test %edx w/o clobbering carry
   177  	js	L(end2)
   178  	incl	%edx
   179  L(oop2):
   180  	leal	4(%edi),%edi
   181  	movl	(%esi),%eax
   182  	M4_inst	%ebx,%eax
   183  	movl	4(%ebp),%ebx
   184  	movl	%eax,-4(%edi)
   185  	leal	4(%esi),%esi
   186  	leal	4(%ebp),%ebp
   187  	decl	%edx
   188  	jnz	L(oop2)
   189  L(end2):
   190  	movl	(%esi),%eax
   191  	M4_inst	%ebx,%eax
   192  	movl	%eax,(%edi)
   193  
   194  	sbbl	%eax,%eax
   195  	negl	%eax
   196  
   197  	popl	%ebp
   198  	popl	%ebx
   199  	popl	%esi
   200  	popl	%edi
   201  	ret
   202  
   203  EPILOGUE()