github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aors_n.asm (about)

     1  dnl  AMD64 mpn_add_n, mpn_sub_n
     2  
     3  dnl  Copyright 2003-2005, 2007, 2008, 2010-2012 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C	     cycles/limb
    34  C AMD K8,K9	 1.5
    35  C AMD K10	 1.5
    36  C AMD bd1	 1.8
    37  C AMD bobcat	 2.5
    38  C Intel P4
    39  C Intel core2	 4.9
    40  C Intel NHM	 5.5
    41  C Intel SBR	 1.61
    42  C Intel IBR	 1.61
    43  C Intel atom	 4
    44  C VIA nano	 3.25
    45  
    46  C The loop of this code is the result of running a code generation and
    47  C optimization tool suite written by David Harvey and Torbjorn Granlund.
    48  
    49  C INPUT PARAMETERS
    50  define(`rp',	`%rdi')	C rcx
    51  define(`up',	`%rsi')	C rdx
    52  define(`vp',	`%rdx')	C r8
    53  define(`n',	`%rcx')	C r9
    54  define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
    55  
    56  ifdef(`OPERATION_add_n', `
    57  	define(ADCSBB,	      adc)
    58  	define(func,	      mpn_add_n)
    59  	define(func_nc,	      mpn_add_nc)')
    60  ifdef(`OPERATION_sub_n', `
    61  	define(ADCSBB,	      sbb)
    62  	define(func,	      mpn_sub_n)
    63  	define(func_nc,	      mpn_sub_nc)')
    64  
    65  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
    66  
    67  ABI_SUPPORT(DOS64)
    68  ABI_SUPPORT(STD64)
    69  
    70  ASM_START()
    71  	TEXT
    72  	ALIGN(16)
    73  PROLOGUE(func_nc)
    74  	FUNC_ENTRY(4)
    75  IFDOS(`	mov	56(%rsp), %r8	')
    76  	mov	R32(n), R32(%rax)
    77  	shr	$2, n
    78  	and	$3, R32(%rax)
    79  	bt	$0, %r8			C cy flag <- carry parameter
    80  	jrcxz	L(lt4)
    81  
    82  	mov	(up), %r8
    83  	mov	8(up), %r9
    84  	dec	n
    85  	jmp	L(mid)
    86  
    87  EPILOGUE()
    88  	ALIGN(16)
    89  PROLOGUE(func)
    90  	FUNC_ENTRY(4)
    91  	mov	R32(n), R32(%rax)
    92  	shr	$2, n
    93  	and	$3, R32(%rax)
    94  	jrcxz	L(lt4)
    95  
    96  	mov	(up), %r8
    97  	mov	8(up), %r9
    98  	dec	n
    99  	jmp	L(mid)
   100  
   101  L(lt4):	dec	R32(%rax)
   102  	mov	(up), %r8
   103  	jnz	L(2)
   104  	ADCSBB	(vp), %r8
   105  	mov	%r8, (rp)
   106  	adc	R32(%rax), R32(%rax)
   107  	FUNC_EXIT()
   108  	ret
   109  
   110  L(2):	dec	R32(%rax)
   111  	mov	8(up), %r9
   112  	jnz	L(3)
   113  	ADCSBB	(vp), %r8
   114  	ADCSBB	8(vp), %r9
   115  	mov	%r8, (rp)
   116  	mov	%r9, 8(rp)
   117  	adc	R32(%rax), R32(%rax)
   118  	FUNC_EXIT()
   119  	ret
   120  
   121  L(3):	mov	16(up), %r10
   122  	ADCSBB	(vp), %r8
   123  	ADCSBB	8(vp), %r9
   124  	ADCSBB	16(vp), %r10
   125  	mov	%r8, (rp)
   126  	mov	%r9, 8(rp)
   127  	mov	%r10, 16(rp)
   128  	setc	R8(%rax)
   129  	FUNC_EXIT()
   130  	ret
   131  
   132  	ALIGN(16)
   133  L(top):	ADCSBB	(vp), %r8
   134  	ADCSBB	8(vp), %r9
   135  	ADCSBB	16(vp), %r10
   136  	ADCSBB	24(vp), %r11
   137  	mov	%r8, (rp)
   138  	lea	32(up), up
   139  	mov	%r9, 8(rp)
   140  	mov	%r10, 16(rp)
   141  	dec	n
   142  	mov	%r11, 24(rp)
   143  	lea	32(vp), vp
   144  	mov	(up), %r8
   145  	mov	8(up), %r9
   146  	lea	32(rp), rp
   147  L(mid):	mov	16(up), %r10
   148  	mov	24(up), %r11
   149  	jnz	L(top)
   150  
   151  L(end):	lea	32(up), up
   152  	ADCSBB	(vp), %r8
   153  	ADCSBB	8(vp), %r9
   154  	ADCSBB	16(vp), %r10
   155  	ADCSBB	24(vp), %r11
   156  	lea	32(vp), vp
   157  	mov	%r8, (rp)
   158  	mov	%r9, 8(rp)
   159  	mov	%r10, 16(rp)
   160  	mov	%r11, 24(rp)
   161  	lea	32(rp), rp
   162  
   163  	inc	R32(%rax)
   164  	dec	R32(%rax)
   165  	jnz	L(lt4)
   166  	adc	R32(%rax), R32(%rax)
   167  	FUNC_EXIT()
   168  	ret
   169  EPILOGUE()