github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/atom/aorslshC_n.asm (about)

     1  dnl  Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
     2  
     3  dnl  Contributed to the GNU project by Marco Bodrato.
     4  
     5  dnl  Copyright 2011 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size);
    36  C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
    37  C				mp_limb_t carry);
    38  C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,);
    39  C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
    40  C				mp_signed_limb_t borrow);
    41  
    42  defframe(PARAM_CORB,	16)
    43  defframe(PARAM_SIZE,	12)
    44  defframe(PARAM_SRC,	 8)
    45  defframe(PARAM_DST,	 4)
    46  
    47  C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    48  C                          mp_size_t size,);
    49  C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    50  C                           mp_size_t size, mp_limb_t carry);
    51  C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    52  C                          mp_size_t size,);
    53  C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    54  C                           mp_size_t size, mp_limb_t borrow);
    55  
    56  C if src1 == dst, _ip1 is used
    57  
    58  C					cycles/limb
    59  C				dst!=src1,src2	dst==src1
    60  C P5
    61  C P6 model 0-8,10-12
    62  C P6 model 9  (Banias)
    63  C P6 model 13 (Dothan)
    64  C P4 model 0  (Willamette)
    65  C P4 model 1  (?)
    66  C P4 model 2  (Northwood)
    67  C P4 model 3  (Prescott)
    68  C P4 model 4  (Nocona)
    69  C Intel Atom			 7		 6
    70  C AMD K6
    71  C AMD K7
    72  C AMD K8
    73  C AMD K10
    74  
    75  defframe(GPARAM_CORB,	20)
    76  defframe(GPARAM_SIZE,	16)
    77  defframe(GPARAM_SRC2,	12)
    78  
    79  dnl  re-use parameter space
    80  define(SAVE_EBP,`PARAM_SIZE')
    81  define(SAVE_EBX,`PARAM_SRC')
    82  define(SAVE_UP,`PARAM_DST')
    83  
    84  define(M, eval(m4_lshift(1,LSH)))
    85  define(`rp',  `%edi')
    86  define(`up',  `%esi')
    87  
    88  ASM_START()
    89  	TEXT
    90  	ALIGN(8)
    91  
    92  PROLOGUE(M4_ip_function_c)
    93  deflit(`FRAME',0)
    94  	movl	PARAM_CORB, %ecx
    95  	movl	%ecx, %edx
    96  	shr	$LSH, %edx
    97  	andl	$1, %edx
    98  	M4_opp	%edx, %ecx
    99  	jmp	L(start_nc)
   100  EPILOGUE()
   101  
   102  PROLOGUE(M4_ip_function)
   103  deflit(`FRAME',0)
   104  
   105  	xor	%ecx, %ecx
   106  	xor	%edx, %edx
   107  L(start_nc):
   108  	push	rp			FRAME_pushl()
   109  	mov	PARAM_DST, rp
   110  	mov	up, SAVE_UP
   111  	mov	PARAM_SRC, up
   112  	mov	%ebx, SAVE_EBX
   113  	mov	PARAM_SIZE, %ebx	C size
   114  L(inplace):
   115  	incl	%ebx			C size + 1
   116  	shr	%ebx			C (size+1)\2
   117  	mov	%ebp, SAVE_EBP
   118  	jnc	L(entry)		C size odd
   119  
   120  	add	%edx, %edx		C size even
   121  	mov	%ecx, %ebp
   122  	mov	(up), %ecx
   123  	lea	-4(rp), rp
   124  	lea	(%ebp,%ecx,M), %eax
   125  	lea	4(up), up
   126  	jmp	L(enteven)
   127  
   128  	ALIGN(16)
   129  L(oop):
   130  	lea	(%ecx,%eax,M), %ebp
   131  	shr	$RSH, %eax
   132  	mov	4(up), %ecx
   133  	add	%edx, %edx
   134  	lea	8(up), up
   135  	M4_inst	%ebp, (rp)
   136  	lea	(%eax,%ecx,M), %eax
   137  
   138  L(enteven):
   139  	M4_inst	%eax, 4(rp)
   140  	lea	8(rp), rp
   141  
   142  	sbb	%edx, %edx
   143  	shr	$RSH, %ecx
   144  
   145  L(entry):
   146  	mov	(up), %eax
   147  	decl	%ebx
   148  	jnz	L(oop)
   149  
   150  	lea	(%ecx,%eax,M), %ebp
   151  	shr	$RSH, %eax
   152  	shr	%edx
   153  	M4_inst	%ebp, (rp)
   154  	mov	SAVE_UP, up
   155  	adc	$0, %eax
   156  	mov	SAVE_EBP, %ebp
   157  	mov	SAVE_EBX, %ebx
   158  	pop	rp			FRAME_popl()
   159  	ret
   160  EPILOGUE()
   161  
   162  PROLOGUE(M4_function_c)
   163  deflit(`FRAME',0)
   164  	movl	GPARAM_CORB, %ecx
   165  	movl	%ecx, %edx
   166  	shr	$LSH, %edx
   167  	andl	$1, %edx
   168  	M4_opp	%edx, %ecx
   169  	jmp	L(generic_nc)
   170  EPILOGUE()
   171  
   172  PROLOGUE(M4_function)
   173  deflit(`FRAME',0)
   174  
   175  	xor	%ecx, %ecx
   176  	xor	%edx, %edx
   177  L(generic_nc):
   178  	push	rp			FRAME_pushl()
   179  	mov	PARAM_DST, rp
   180  	mov	up, SAVE_UP
   181  	mov	PARAM_SRC, up
   182  	cmp	rp, up
   183  	mov	%ebx, SAVE_EBX
   184  	jne	L(general)
   185  	mov	GPARAM_SIZE, %ebx	C size
   186  	mov	GPARAM_SRC2, up
   187  	jmp	L(inplace)
   188  
   189  L(general):
   190  	mov	GPARAM_SIZE, %eax	C size
   191  	mov	%ebx, SAVE_EBX
   192  	incl	%eax			C size + 1
   193  	mov	up, %ebx		C vp
   194  	mov	GPARAM_SRC2, up		C up
   195  	shr	%eax			C (size+1)\2
   196  	mov	%ebp, SAVE_EBP
   197  	mov	%eax, GPARAM_SIZE
   198  	jnc	L(entry2)		C size odd
   199  
   200  	add	%edx, %edx		C size even
   201  	mov	%ecx, %ebp
   202  	mov	(up), %ecx
   203  	lea	-4(rp), rp
   204  	lea	-4(%ebx), %ebx
   205  	lea	(%ebp,%ecx,M), %eax
   206  	lea	4(up), up
   207  	jmp	L(enteven2)
   208  
   209  	ALIGN(16)
   210  L(oop2):
   211  	lea	(%ecx,%eax,M), %ebp
   212  	shr	$RSH, %eax
   213  	mov	4(up), %ecx
   214  	add	%edx, %edx
   215  	lea	8(up), up
   216  	mov	(%ebx), %edx
   217  	M4_inst	%ebp, %edx
   218  	lea	(%eax,%ecx,M), %eax
   219  	mov	%edx, (rp)
   220  L(enteven2):
   221  	mov	4(%ebx), %edx
   222  	lea	8(%ebx), %ebx
   223  	M4_inst	%eax, %edx
   224  	mov	%edx, 4(rp)
   225  	sbb	%edx, %edx
   226  	shr	$RSH, %ecx
   227  	lea	8(rp), rp
   228  L(entry2):
   229  	mov	(up), %eax
   230  	decl	GPARAM_SIZE
   231  	jnz	L(oop2)
   232  
   233  	lea	(%ecx,%eax,M), %ebp
   234  	shr	$RSH, %eax
   235  	shr	%edx
   236  	mov	(%ebx), %edx
   237  	M4_inst	%ebp, %edx
   238  	mov	%edx, (rp)
   239  	mov	SAVE_UP, up
   240  	adc	$0, %eax
   241  	mov	SAVE_EBP, %ebp
   242  	mov	SAVE_EBX, %ebx
   243  	pop	rp			FRAME_popl()
   244  	ret
   245  EPILOGUE()
   246  
   247  ASM_END()