github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/aorrlshC_n.asm (about)

     1  dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
     2  dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
     3  
     4  dnl  Copyright 2009-2012 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  
    33  C	     cycles/limb
    34  C AMD K8,K9	 2
    35  C AMD K10	 2
    36  C Intel P4	 ?
    37  C Intel core2	 3
    38  C Intel NHM	 2.75
    39  C Intel SBR	 2.55
    40  C Intel atom	 ?
    41  C VIA nano	 ?
    42  
    43  C INPUT PARAMETERS
    44  define(`rp',	`%rdi')
    45  define(`up',	`%rsi')
    46  define(`vp',	`%rdx')
    47  define(`n',	`%rcx')
    48  
    49  define(M, eval(m4_lshift(1,LSH)))
    50  
    51  ABI_SUPPORT(DOS64)
    52  ABI_SUPPORT(STD64)
    53  
    54  ASM_START()
    55  	TEXT
    56  	ALIGN(16)
    57  PROLOGUE(func)
    58  	FUNC_ENTRY(4)
    59  	push	%r12
    60  	push	%r13
    61  	push	%r14
    62  	push	%r15
    63  
    64  	mov	(vp), %r8
    65  	lea	(,%r8,M), %r12
    66  	shr	$RSH, %r8
    67  
    68  	mov	R32(n), R32(%rax)
    69  	lea	(rp,n,8), rp
    70  	lea	(up,n,8), up
    71  	lea	(vp,n,8), vp
    72  	neg	n
    73  	and	$3, R8(%rax)
    74  	je	L(b00)
    75  	cmp	$2, R8(%rax)
    76  	jc	L(b01)
    77  	je	L(b10)
    78  
    79  L(b11):	mov	8(vp,n,8), %r10
    80  	lea	(%r8,%r10,M), %r14
    81  	shr	$RSH, %r10
    82  	mov	16(vp,n,8), %r11
    83  	lea	(%r10,%r11,M), %r15
    84  	shr	$RSH, %r11
    85  	ADDSUB	(up,n,8), %r12
    86  	ADCSBB	8(up,n,8), %r14
    87  	ADCSBB	16(up,n,8), %r15
    88  	sbb	R32(%rax), R32(%rax)		  C save carry for next
    89  	mov	%r12, (rp,n,8)
    90  	mov	%r14, 8(rp,n,8)
    91  	mov	%r15, 16(rp,n,8)
    92  	add	$3, n
    93  	js	L(top)
    94  	jmp	L(end)
    95  
    96  L(b01):	mov	%r8, %r11
    97  	ADDSUB	(up,n,8), %r12
    98  	sbb	R32(%rax), R32(%rax)		  C save carry for next
    99  	mov	%r12, (rp,n,8)
   100  	add	$1, n
   101  	js	L(top)
   102  	jmp	L(end)
   103  
   104  L(b10):	mov	8(vp,n,8), %r11
   105  	lea	(%r8,%r11,M), %r15
   106  	shr	$RSH, %r11
   107  	ADDSUB	(up,n,8), %r12
   108  	ADCSBB	8(up,n,8), %r15
   109  	sbb	R32(%rax), R32(%rax)		  C save carry for next
   110  	mov	%r12, (rp,n,8)
   111  	mov	%r15, 8(rp,n,8)
   112  	add	$2, n
   113  	js	L(top)
   114  	jmp	L(end)
   115  
   116  L(b00):	mov	8(vp,n,8), %r9
   117  	mov	16(vp,n,8), %r10
   118  	jmp	L(e00)
   119  
   120  	ALIGN(16)
   121  L(top):	mov	16(vp,n,8), %r10
   122  	mov	(vp,n,8), %r8
   123  	mov	8(vp,n,8), %r9
   124  	lea	(%r11,%r8,M), %r12
   125  	shr	$RSH, %r8
   126  L(e00):	lea	(%r8,%r9,M), %r13
   127  	shr	$RSH, %r9
   128  	mov	24(vp,n,8), %r11
   129  	lea	(%r9,%r10,M), %r14
   130  	shr	$RSH, %r10
   131  	lea	(%r10,%r11,M), %r15
   132  	shr	$RSH, %r11
   133  	add	R32(%rax), R32(%rax)		  C restore carry
   134  	ADCSBB	(up,n,8), %r12
   135  	ADCSBB	8(up,n,8), %r13
   136  	ADCSBB	16(up,n,8), %r14
   137  	ADCSBB	24(up,n,8), %r15
   138  	mov	%r12, (rp,n,8)
   139  	mov	%r13, 8(rp,n,8)
   140  	mov	%r14, 16(rp,n,8)
   141  	sbb	R32(%rax), R32(%rax)		  C save carry for next
   142  	mov	%r15, 24(rp,n,8)
   143  	add	$4, n
   144  	js	L(top)
   145  L(end):
   146  
   147  ifelse(ADDSUB,add,`
   148  	sub	R32(%r11), R32(%rax)
   149  	neg	R32(%rax)
   150  ',`
   151  	add	R32(%r11), R32(%rax)
   152  	movslq	R32(%rax), %rax
   153  ')
   154  	pop	%r15
   155  	pop	%r14
   156  	pop	%r13
   157  	pop	%r12
   158  	FUNC_EXIT()
   159  	ret
   160  EPILOGUE()