github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/atom/rshift.asm (about)

     1  dnl  AMD64 mpn_rshift -- mpn right shift, optimised for Atom.
     2  
     3  dnl  Contributed to the GNU project by Torbjorn Granlund.
     4  
     5  dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C	     cycles/limb
    36  C AMD K8,K9	 ?
    37  C AMD K10	 ?
    38  C Intel P4	 ?
    39  C Intel core2	 ?
    40  C Intel NHM	 ?
    41  C Intel SBR	 ?
    42  C Intel atom	 4.5
    43  C VIA nano	 ?
    44  
    45  C TODO
    46  C  * Consider using 4-way unrolling.  We reach 4 c/l, but the code is 2.5 times
    47  C    larger.
    48  
    49  C INPUT PARAMETERS
    50  define(`rp',	`%rdi')
    51  define(`up',	`%rsi')
    52  define(`n',	`%rdx')
    53  define(`cnt',	`%rcx')
    54  
    55  ABI_SUPPORT(DOS64)
    56  ABI_SUPPORT(STD64)
    57  
    58  ASM_START()
    59  	TEXT
    60  	ALIGN(16)
    61  PROLOGUE(mpn_rshift)
    62  	FUNC_ENTRY(4)
    63  	shr	R32(n)
    64  	mov	(up), %rax
    65  	jnc	L(evn)
    66  
    67  	mov	%rax, %r11
    68  	shr	R8(cnt), %r11
    69  	neg	R8(cnt)
    70  	shl	R8(cnt), %rax
    71  	test	n, n
    72  	jnz	L(gt1)
    73  	mov	%r11, (rp)
    74  	FUNC_EXIT()
    75  	ret
    76  
    77  L(gt1):	mov	8(up), %r8
    78  	mov	%r8, %r10
    79  	shl	R8(cnt), %r8
    80  	jmp	L(lo1)
    81  
    82  L(evn):	mov	%rax, %r10
    83  	neg	R8(cnt)
    84  	shl	R8(cnt), %rax
    85  	mov	8(up), %r9
    86  	mov	%r9, %r11
    87  	shl	R8(cnt), %r9
    88  	neg	R8(cnt)
    89  	dec	n
    90  	lea	-8(rp), rp
    91  	lea	8(up), up
    92  	jz	L(end)
    93  
    94  	ALIGN(8)
    95  L(top):	shr	R8(cnt), %r10
    96  	or	%r10, %r9
    97  	shr	R8(cnt), %r11
    98  	neg	R8(cnt)
    99  	mov	8(up), %r8
   100  	mov	%r8, %r10
   101  	mov	%r9, 8(rp)
   102  	shl	R8(cnt), %r8
   103  	lea	16(rp), rp
   104  L(lo1):	mov	16(up), %r9
   105  	or	%r11, %r8
   106  	mov	%r9, %r11
   107  	shl	R8(cnt), %r9
   108  	lea	16(up), up
   109  	neg	R8(cnt)
   110  	mov	%r8, (rp)
   111  	dec	n
   112  	jg	L(top)
   113  
   114  L(end):	shr	R8(cnt), %r10
   115  	or	%r10, %r9
   116  	shr	R8(cnt), %r11
   117  	mov	%r9, 8(rp)
   118  	mov	%r11, 16(rp)
   119  	FUNC_EXIT()
   120  	ret
   121  EPILOGUE()