github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/sqr_diag_addlsh1.asm (about)

     1  dnl  AMD64 mpn_sqr_diag_addlsh1
     2  
     3  dnl  Contributed to the GNU project by Torbjörn Granlund.
     4  
     5  dnl  Copyright 2011-2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C	     cycles/limb
    36  C AMD K8,K9	 2.5
    37  C AMD K10	 2.5
    38  C AMD bull	 3.6
    39  C AMD pile	 3.6
    40  C AMD steam	 ?
    41  C AMD bobcat	 4
    42  C AMD jaguar	 ?
    43  C Intel P4	 ?
    44  C Intel core	 4
    45  C Intel NHM	 3.6
    46  C Intel SBR	 3.15
    47  C Intel IBR	 3.2
    48  C Intel HWL	 2.6
    49  C Intel BWL	 ?
    50  C Intel atom	14
    51  C VIA nano	 3.5
    52  
    53  C When playing with pointers, set this to $2 to fall back to conservative
    54  C indexing in wind-down code.
    55  define(`I',`$1')
    56  
    57  define(`rp',     `%rdi')
    58  define(`tp',     `%rsi')
    59  define(`up_arg', `%rdx')
    60  define(`n',      `%rcx')
    61  
    62  define(`up',     `%r11')
    63  
    64  ABI_SUPPORT(DOS64)
    65  ABI_SUPPORT(STD64)
    66  
    67  ASM_START()
    68  	TEXT
    69  	ALIGN(32)
    70  PROLOGUE(mpn_sqr_diag_addlsh1)
    71  	FUNC_ENTRY(4)
    72  	push	%rbx
    73  
    74  	dec	n
    75  	shl	n
    76  
    77  	mov	(up_arg), %rax
    78  
    79  	lea	(rp,n,8), rp
    80  	lea	(tp,n,8), tp
    81  	lea	(up_arg,n,4), up
    82  	neg	n
    83  
    84  	mul	%rax
    85  	mov	%rax, (rp,n,8)
    86  
    87  	xor	R32(%rbx), R32(%rbx)
    88  	jmp	L(mid)
    89  
    90  	ALIGN(16)
    91  L(top):	add	%r10, %r8
    92  	adc	%rax, %r9
    93  	mov	%r8, -8(rp,n,8)
    94  	mov	%r9, (rp,n,8)
    95  L(mid):	mov	8(up,n,4), %rax
    96  	mov	(tp,n,8), %r8
    97  	mov	8(tp,n,8), %r9
    98  	adc	%r8, %r8
    99  	adc	%r9, %r9
   100  	lea	(%rdx,%rbx), %r10
   101  	setc	R8(%rbx)
   102  	mul	%rax
   103  	add	$2, n
   104  	js	L(top)
   105  
   106  L(end):	add	%r10, %r8
   107  	adc	%rax, %r9
   108  	mov	%r8, I(-8(rp),-8(rp,n,8))
   109  	mov	%r9, I((rp),(rp,n,8))
   110  	adc	%rbx, %rdx
   111  	mov	%rdx, I(8(rp),8(rp,n,8))
   112  
   113  	pop	%rbx
   114  	FUNC_EXIT()
   115  	ret
   116  EPILOGUE()