github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/atom/aorrlshC_n.asm (about)

     1  dnl  Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[]
     2  
     3  dnl  Contributed to the GNU project by Marco Bodrato.
     4  
     5  dnl  Copyright 2011 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    36  C                          mp_size_t size);
    37  C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    38  C                           mp_size_t size, mp_limb_t carry);
    39  C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    40  C                          mp_size_t size);
    41  C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
    42  C                           mp_size_t size, mp_signed_limb_t carry);
    43  
    44  C				cycles/limb
    45  C P5
    46  C P6 model 0-8,10-12
    47  C P6 model 9  (Banias)
    48  C P6 model 13 (Dothan)
    49  C P4 model 0  (Willamette)
    50  C P4 model 1  (?)
    51  C P4 model 2  (Northwood)
    52  C P4 model 3  (Prescott)
    53  C P4 model 4  (Nocona)
    54  C Intel Atom			 6
    55  C AMD K6
    56  C AMD K7
    57  C AMD K8
    58  C AMD K10
    59  
    60  defframe(PARAM_CORB,	20)
    61  defframe(PARAM_SIZE,	16)
    62  defframe(PARAM_DBLD,	12)
    63  defframe(PARAM_SRC,	 8)
    64  defframe(PARAM_DST,	 4)
    65  
    66  dnl  re-use parameter space
    67  define(VAR_COUNT,`PARAM_SIZE')
    68  define(SAVE_EBP,`PARAM_DBLD')
    69  define(SAVE_VP,`PARAM_SRC')
    70  define(SAVE_UP,`PARAM_DST')
    71  
    72  define(M, eval(m4_lshift(1,LSH)))
    73  define(`rp',  `%edi')
    74  define(`up',  `%esi')
    75  define(`vp',  `%ebx')
    76  
    77  ASM_START()
    78  	TEXT
    79  	ALIGN(8)
    80  
    81  PROLOGUE(M4_function_c)
    82  deflit(`FRAME',0)
    83  	movl	PARAM_CORB, %eax
    84  	movl	%eax, %edx
    85  	shr	$LSH, %edx
    86  	andl	$1, %edx
    87  	M4_opp	%edx, %eax
    88  	jmp	L(start_nc)
    89  EPILOGUE()
    90  
    91  PROLOGUE(M4_function)
    92  deflit(`FRAME',0)
    93  
    94  	xor	%eax, %eax
    95  	xor	%edx, %edx
    96  L(start_nc):
    97  	push	rp			FRAME_pushl()
    98  
    99  	mov	PARAM_SIZE, %ecx	C size
   100  	mov	PARAM_DST, rp
   101  	mov	up, SAVE_UP
   102  	incl	%ecx			C size + 1
   103  	mov	PARAM_SRC, up
   104  	mov	vp, SAVE_VP
   105  	shr	%ecx			C (size+1)\2
   106  	mov	PARAM_DBLD, vp
   107  	mov	%ebp, SAVE_EBP
   108  	mov	%ecx, VAR_COUNT
   109  	jnc	L(entry)		C size odd
   110  
   111  	shr	%edx			C size even
   112  	mov	(vp), %ecx
   113  	lea	4(vp), vp
   114  	lea	(%eax,%ecx,M), %edx
   115  	mov	%ecx, %eax
   116  	lea	-4(up), up
   117  	lea	-4(rp), rp
   118  	jmp	L(enteven)
   119  
   120  	ALIGN(16)
   121  L(oop):
   122  	lea	(%eax,%ecx,M), %ebp
   123  	shr	$RSH, %ecx
   124  	mov	4(vp), %eax
   125  	shr	%edx
   126  	lea	8(vp), vp
   127  	M4_inst	(up), %ebp
   128  	lea	(%ecx,%eax,M), %edx
   129  	mov	%ebp, (rp)
   130  L(enteven):
   131  	M4_inst	4(up), %edx
   132  	lea	8(up), up
   133  	mov	%edx, 4(rp)
   134  	adc	%edx, %edx
   135  	shr	$RSH, %eax
   136  	lea	8(rp), rp
   137  L(entry):
   138  	mov	(vp), %ecx
   139  	decl	VAR_COUNT
   140  	jnz	L(oop)
   141  
   142  	lea	(%eax,%ecx,M), %ebp
   143  	shr	$RSH, %ecx
   144  	shr	%edx
   145  	mov	SAVE_VP, vp
   146  	M4_inst	(up), %ebp
   147  	mov	%ecx, %eax
   148  	mov	SAVE_UP, up
   149  	M4_inst	$0, %eax
   150  	mov	%ebp, (rp)
   151  	mov	SAVE_EBP, %ebp
   152  	pop	rp			FRAME_popl()
   153  	ret
   154  EPILOGUE()
   155  
   156  ASM_END()