github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/atom/aorrlsh2_n.asm (about)

     1  dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
     2  dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
     3  dnl  Optimised for Intel Atom.
     4  
     5  dnl  Contributed to the GNU project by Torbjorn Granlund.
     6  
     7  dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
     8  
     9  dnl  This file is part of the GNU MP Library.
    10  dnl
    11  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    12  dnl  it under the terms of either:
    13  dnl
    14  dnl    * the GNU Lesser General Public License as published by the Free
    15  dnl      Software Foundation; either version 3 of the License, or (at your
    16  dnl      option) any later version.
    17  dnl
    18  dnl  or
    19  dnl
    20  dnl    * the GNU General Public License as published by the Free Software
    21  dnl      Foundation; either version 2 of the License, or (at your option) any
    22  dnl      later version.
    23  dnl
    24  dnl  or both in parallel, as here.
    25  dnl
    26  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    27  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    28  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    29  dnl  for more details.
    30  dnl
    31  dnl  You should have received copies of the GNU General Public License and the
    32  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    33  dnl  see https://www.gnu.org/licenses/.
    34  
    35  include(`../config.m4')
    36  
    37  C	     cycles/limb
    38  C AMD K8,K9	 ?
    39  C AMD K10	 ?
    40  C Intel P4	 ?
    41  C Intel core2	 ?
    42  C Intel NHM	 ?
    43  C Intel SBR	 ?
    44  C Intel atom	 5.75
    45  C VIA nano	 ?
    46  
    47  C INPUT PARAMETERS
    48  define(`rp',       `%rdi')
    49  define(`up',       `%rsi')
    50  define(`vp',       `%rdx')
    51  define(`n',        `%rcx')
    52  
    53  define(`LSH', 2)
    54  define(`RSH', 62)
    55  define(M, eval(m4_lshift(1,LSH)))
    56  
    57  ifdef(`OPERATION_addlsh2_n', `
    58    define(ADDSUB,	add)
    59    define(ADCSBB,	adc)
    60    define(func_n,	mpn_addlsh2_n)
    61    define(func_nc,	mpn_addlsh2_nc)')
    62  ifdef(`OPERATION_rsblsh2_n', `
    63    define(ADDSUB,	sub)
    64    define(ADCSBB,	sbb)
    65    define(func_n,	mpn_rsblsh2_n)
    66    define(func_nc,	mpn_rsblsh2_nc)')
    67  
    68  ABI_SUPPORT(DOS64)
    69  ABI_SUPPORT(STD64)
    70  
    71  MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
    72  
    73  ASM_START()
    74  	TEXT
    75  	ALIGN(16)
    76  PROLOGUE(func_n)
    77  	FUNC_ENTRY(4)
    78  	push	%rbx
    79  	push	%rbp
    80  
    81  	mov	R32(n), R32(%rax)
    82  	and	$3, R32(%rax)
    83  	jz	L(b0)			C we rely on rax = 0 at target
    84  	cmp	$2, R32(%rax)
    85  	mov	$0, R32(%rax)
    86  	jz	L(b2)
    87  	jg	L(b3)
    88  
    89  L(b1):	mov	(vp), %r9
    90  	lea	(%rax,%r9,M), %rbp
    91  	shr	$RSH, %r9
    92  	sub	$1, n
    93  	lea	-8(up), up
    94  	lea	-8(rp), rp
    95  	jz	L(cj1)
    96  	mov	8(vp), %r10
    97  	lea	(%r9,%r10,M), %r9
    98  	shr	$RSH, %r10
    99  	mov	16(vp), %r11
   100  	lea	24(vp), vp
   101  	mov	(vp), %r8
   102  	lea	(%r10,%r11,M), %r10
   103  	shr	$RSH, %r11
   104  	add	R32(%rax), R32(%rax)
   105  	jmp	L(L1)
   106  
   107  L(b2):	lea	-32(rp), rp
   108  	mov	(vp), %r8
   109  	lea	-32(up), up
   110  	lea	(%rax,%r8,M), %rbx
   111  	shr	$RSH, %r8
   112  	mov	8(vp), %r9
   113  	sub	$2, n
   114  	jle	L(end)
   115  	jmp	L(top)
   116  
   117  L(b3):	lea	-24(up), up
   118  	mov	(vp), %r11
   119  	lea	-24(rp), rp
   120  	mov	8(vp), %r8
   121  	lea	(%rax,%r11,M), %r10
   122  	shr	$RSH, %r11
   123  	lea	8(vp), vp
   124  	lea	(%r11,%r8,M), %rbx
   125  	add	$1, n
   126  	jmp	L(L3)
   127  
   128  L(b0):	lea	-16(up), up
   129  	mov	(vp), %r10
   130  	lea	(%rax,%r10,M), %r9
   131  	shr	$RSH, %r10
   132  	mov	8(vp), %r11
   133  	lea	-16(rp), rp
   134  	mov	16(vp), %r8
   135  	lea	(%r10,%r11,M), %r10
   136  	shr	$RSH, %r11
   137  	add	R32(%rax), R32(%rax)
   138  	lea	16(vp), vp
   139  	jmp	L(L0)
   140  
   141  	ALIGN(16)
   142  L(top):	lea	(%r8,%r9,M), %rbp
   143  	shr	$RSH, %r9
   144  	lea	32(up), up
   145  	mov	16(vp), %r10
   146  	lea	(%r9,%r10,M), %r9
   147  	shr	$RSH, %r10
   148  	mov	24(vp), %r11
   149  	lea	32(rp), rp
   150  	lea	32(vp), vp
   151  	mov	(vp), %r8
   152  	lea	(%r10,%r11,M), %r10
   153  	shr	$RSH, %r11
   154  	add	R32(%rax), R32(%rax)
   155  	ADCSBB	(up), %rbx
   156  	mov	%rbx, (rp)
   157  L(L1):	ADCSBB	8(up), %rbp
   158  	mov	%rbp, 8(rp)
   159  L(L0):	ADCSBB	16(up), %r9
   160  	lea	(%r11,%r8,M), %rbx
   161  	mov	%r9, 16(rp)
   162  L(L3):	ADCSBB	24(up), %r10
   163  	sbb	R32(%rax), R32(%rax)
   164  L(L2):	shr	$RSH, %r8
   165  	mov	8(vp), %r9
   166  	mov	%r10, 24(rp)
   167  	sub	$4, n
   168  	jg	L(top)
   169  
   170  L(end):	lea	(%r8,%r9,M), %rbp
   171  	shr	$RSH, %r9
   172  	lea	32(up), up
   173  	lea	32(rp), rp
   174  	add	R32(%rax), R32(%rax)
   175  	ADCSBB	(up), %rbx
   176  	mov	%rbx, (rp)
   177  L(cj1):	ADCSBB	8(up), %rbp
   178  	mov	%rbp, 8(rp)
   179  
   180  ifdef(`OPERATION_addlsh2_n',`
   181  	mov	R32(n), R32(%rax)	C zero rax
   182  	adc	%r9, %rax')
   183  ifdef(`OPERATION_rsblsh2_n',`
   184  	sbb	n, %r9			C subtract 0
   185  	mov	%r9, %rax')
   186  
   187  	pop	%rbp
   188  	pop	%rbx
   189  	FUNC_EXIT()
   190  	ret
   191  EPILOGUE()