github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/rsh1aors_n.asm (about)

     1  dnl  AMD64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
     2  dnl  AMD64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
     3  
     4  dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  include(`../config.m4')
    33  
    34  C	     cycles/limb
    35  C AMD K8,K9	 2.14	(mpn_add_n + mpn_rshift need 4.125)
    36  C AMD K10	 2.14	(mpn_add_n + mpn_rshift need 4.125)
    37  C Intel P4	12.75
    38  C Intel core2	 3.75
    39  C Intel NMH	 4.4
    40  C Intel SBR	 ?
    41  C Intel atom	 ?
    42  C VIA nano	 3.25
    43  
    44  C TODO
    45  C  * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
    46  
    47  C INPUT PARAMETERS
    48  define(`rp', `%rdi')
    49  define(`up', `%rsi')
    50  define(`vp', `%rdx')
    51  define(`n',`  %rcx')
    52  
    53  ifdef(`OPERATION_rsh1add_n', `
    54  	define(ADDSUB,	      add)
    55  	define(ADCSBB,	      adc)
    56  	define(func_n,	      mpn_rsh1add_n)
    57  	define(func_nc,	      mpn_rsh1add_nc)')
    58  ifdef(`OPERATION_rsh1sub_n', `
    59  	define(ADDSUB,	      sub)
    60  	define(ADCSBB,	      sbb)
    61  	define(func_n,	      mpn_rsh1sub_n)
    62  	define(func_nc,	      mpn_rsh1sub_nc)')
    63  
    64  MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
    65  
    66  ABI_SUPPORT(DOS64)
    67  ABI_SUPPORT(STD64)
    68  
    69  ASM_START()
    70  	TEXT
    71  	ALIGN(16)
    72  PROLOGUE(func_nc)
    73  	FUNC_ENTRY(4)
    74  IFDOS(`	mov	56(%rsp), %r8	')
    75  	push	%rbx
    76  
    77  	xor	R32(%rax), R32(%rax)
    78  	neg	%r8			C set C flag from parameter
    79  	mov	(up), %rbx
    80  	ADCSBB	(vp), %rbx
    81  	jmp	L(ent)
    82  EPILOGUE()
    83  
    84  	ALIGN(16)
    85  PROLOGUE(func_n)
    86  	FUNC_ENTRY(4)
    87  	push	%rbx
    88  
    89  	xor	R32(%rax), R32(%rax)
    90  	mov	(up), %rbx
    91  	ADDSUB	(vp), %rbx
    92  L(ent):
    93  	rcr	%rbx			C rotate, save acy
    94  	adc	R32(%rax), R32(%rax)	C return value
    95  
    96  	mov	R32(n), R32(%r11)
    97  	and	$3, R32(%r11)
    98  
    99  	cmp	$1, R32(%r11)
   100  	je	L(do)			C jump if n = 1 5 9 ...
   101  
   102  L(n1):	cmp	$2, R32(%r11)
   103  	jne	L(n2)			C jump unless n = 2 6 10 ...
   104  	add	%rbx, %rbx		C rotate carry limb, restore acy
   105  	mov	8(up), %r10
   106  	ADCSBB	8(vp), %r10
   107  	lea	8(up), up
   108  	lea	8(vp), vp
   109  	lea	8(rp), rp
   110  	rcr	%r10
   111  	rcr	%rbx
   112  	mov	%rbx, -8(rp)
   113  	jmp	L(cj1)
   114  
   115  L(n2):	cmp	$3, R32(%r11)
   116  	jne	L(n3)			C jump unless n = 3 7 11 ...
   117  	add	%rbx, %rbx		C rotate carry limb, restore acy
   118  	mov	8(up), %r9
   119  	mov	16(up), %r10
   120  	ADCSBB	8(vp), %r9
   121  	ADCSBB	16(vp), %r10
   122  	lea	16(up), up
   123  	lea	16(vp), vp
   124  	lea	16(rp), rp
   125  	rcr	%r10
   126  	rcr	%r9
   127  	rcr	%rbx
   128  	mov	%rbx, -16(rp)
   129  	jmp	L(cj2)
   130  
   131  L(n3):	dec	n			C come here for n = 4 8 12 ...
   132  	add	%rbx, %rbx		C rotate carry limb, restore acy
   133  	mov	8(up), %r8
   134  	mov	16(up), %r9
   135  	ADCSBB	8(vp), %r8
   136  	ADCSBB	16(vp), %r9
   137  	mov	24(up), %r10
   138  	ADCSBB	24(vp), %r10
   139  	lea	24(up), up
   140  	lea	24(vp), vp
   141  	lea	24(rp), rp
   142  	rcr	%r10
   143  	rcr	%r9
   144  	rcr	%r8
   145  	rcr	%rbx
   146  	mov	%rbx, -24(rp)
   147  	mov	%r8, -16(rp)
   148  L(cj2):	mov	%r9, -8(rp)
   149  L(cj1):	mov	%r10, %rbx
   150  
   151  L(do):
   152  	shr	$2, n			C				4
   153  	je	L(end)			C				2
   154  	ALIGN(16)
   155  L(top):	add	%rbx, %rbx		C rotate carry limb, restore acy
   156  
   157  	mov	8(up), %r8
   158  	mov	16(up), %r9
   159  	ADCSBB	8(vp), %r8
   160  	ADCSBB	16(vp), %r9
   161  	mov	24(up), %r10
   162  	mov	32(up), %r11
   163  	ADCSBB	24(vp), %r10
   164  	ADCSBB	32(vp), %r11
   165  
   166  	lea	32(up), up
   167  	lea	32(vp), vp
   168  
   169  	rcr	%r11			C rotate, save acy
   170  	rcr	%r10
   171  	rcr	%r9
   172  	rcr	%r8
   173  
   174  	rcr	%rbx
   175  	mov	%rbx, (rp)
   176  	mov	%r8, 8(rp)
   177  	mov	%r9, 16(rp)
   178  	mov	%r10, 24(rp)
   179  	mov	%r11, %rbx
   180  
   181  	lea	32(rp), rp
   182  	dec	n
   183  	jne	L(top)
   184  
   185  L(end):	mov	%rbx, (rp)
   186  	pop	%rbx
   187  	FUNC_EXIT()
   188  	ret
   189  EPILOGUE()