github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/bobcat/aors_n.asm (about)

     1  dnl  AMD64 mpn_add_n, mpn_sub_n optimised for bobcat.
     2  
     3  dnl  Copyright 2003-2005, 2007, 2008, 2010-2013 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C	     cycles/limb
    34  C AMD K8,K9
    35  C AMD K10
    36  C AMD bd1
    37  C AMD bobcat	 2.28
    38  C Intel P4
    39  C Intel core2
    40  C Intel NHM
    41  C Intel SBR
    42  C Intel IBR
    43  C Intel atom
    44  C VIA nano
    45  
    46  C The loop of this code is the result of running a code generation and
    47  C optimization tool suite written by David Harvey and Torbjorn Granlund.
    48  
    49  C INPUT PARAMETERS
    50  define(`rp',	`%rdi')	C rcx
    51  define(`up',	`%rsi')	C rdx
    52  define(`vp',	`%rdx')	C r8
    53  define(`n',	`%rcx')	C r9
    54  define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
    55  
    56  ifdef(`OPERATION_add_n', `
    57  	define(ADCSBB,	      adc)
    58  	define(func,	      mpn_add_n)
    59  	define(func_nc,	      mpn_add_nc)')
    60  ifdef(`OPERATION_sub_n', `
    61  	define(ADCSBB,	      sbb)
    62  	define(func,	      mpn_sub_n)
    63  	define(func_nc,	      mpn_sub_nc)')
    64  
    65  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
    66  
    67  ABI_SUPPORT(DOS64)
    68  ABI_SUPPORT(STD64)
    69  
    70  ASM_START()
    71  	TEXT
    72  	ALIGN(16)
    73  PROLOGUE(func)
    74  	FUNC_ENTRY(4)
    75  	xor	%r8, %r8
    76  L(ent):	test	$1, R8(n)
    77  	jnz	L(bx1)
    78  
    79  L(bx0):	test	$2, R8(n)
    80  	jnz	L(b10)
    81  
    82  L(b00):	shr	$2, n
    83  	neg	%r8
    84  	mov	$3, R32(%rax)
    85  	mov	(up), %r10
    86  	mov	8(up), %r11
    87  	jmp	L(lo0)
    88  
    89  L(b10):	shr	$2, n
    90  	neg	%r8
    91  	mov	$1, R32(%rax)
    92  	mov	(up), %r8
    93  	mov	8(up), %r9
    94  	jrcxz	L(cj2)
    95  	jmp	L(top)
    96  
    97  L(bx1):	test	$2, R8(n)
    98  	jnz	L(b11)
    99  
   100  L(b01):	shr	$2, n
   101  	neg	%r8
   102  	mov	$0, R32(%rax)
   103  	mov	(up), %r9
   104  	jrcxz	L(cj1)
   105  	mov	8(up), %r10
   106  	jmp	L(lo1)
   107  
   108  	ALIGN(8)
   109  L(b11):	inc	n
   110  	shr	$2, n
   111  	neg	%r8
   112  	mov	$2, R32(%rax)
   113  	mov	(up), %r11
   114  	jmp	L(lo3)
   115  
   116  	ALIGN(4)
   117  L(top):	mov	8(up,%rax,8), %r10
   118  	ADCSBB	-8(vp,%rax,8), %r8
   119  	mov	%r8, -8(rp,%rax,8)
   120  L(lo1):	mov	16(up,%rax,8), %r11
   121  	ADCSBB	(vp,%rax,8), %r9
   122  	lea	4(%rax), %rax
   123  	mov	%r9, -32(rp,%rax,8)
   124  L(lo0):	ADCSBB	-24(vp,%rax,8), %r10
   125  	mov	%r10, -24(rp,%rax,8)
   126  L(lo3):	ADCSBB	-16(vp,%rax,8), %r11
   127  	dec	n
   128  	mov	-8(up,%rax,8), %r8
   129  	mov	%r11, -16(rp,%rax,8)
   130  L(lo2):	mov	(up,%rax,8), %r9
   131  	jnz	L(top)
   132  
   133  L(cj2):	ADCSBB	-8(vp,%rax,8), %r8
   134  	mov	%r8, -8(rp,%rax,8)
   135  L(cj1):	ADCSBB	(vp,%rax,8), %r9
   136  	mov	%r9, (rp,%rax,8)
   137  
   138  	mov	$0, R32(%rax)
   139  	adc	$0, R32(%rax)
   140  
   141  	FUNC_EXIT()
   142  	ret
   143  EPILOGUE()
   144  
   145  	ALIGN(16)
   146  PROLOGUE(func_nc)
   147  	FUNC_ENTRY(4)
   148  IFDOS(`	mov	56(%rsp), %r8	')
   149  	jmp	L(ent)
   150  EPILOGUE()