github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm64/rshift.asm (about)

     1  dnl  ARM64 mpn_rshift.
     2  
     3  dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of the GNU Lesser General Public License as published
     9  dnl  by the Free Software Foundation; either version 3 of the License, or (at
    10  dnl  your option) any later version.
    11  
    12  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    13  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    14  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    15  dnl  License for more details.
    16  
    17  dnl  You should have received a copy of the GNU Lesser General Public License
    18  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
    19  
    20  include(`../config.m4')
    21  
    22  C	     cycles/limb
    23  C Cortex-A53	 ?
    24  C Cortex-A57	 ?
    25  
    26  changecom(@&*$)
    27  
    28  define(`rp_arg', `x0')
    29  define(`up',     `x1')
    30  define(`n',      `x2')
    31  define(`cnt',    `x3')
    32  
    33  define(`rp',     `x16')
    34  
    35  define(`tnc',`x8')
    36  
    37  ASM_START()
    38  PROLOGUE(mpn_rshift)
    39  	mov	rp, rp_arg
    40  	sub	tnc, xzr, cnt
    41  	tbz	n, #0, L(bx0)
    42  
    43  L(bx1):	ldr	x4, [up,#0]
    44  	tbnz	n, #1, L(b11)
    45  
    46  L(b01):	lsl	x0, x4, tnc
    47  	lsr	x18, x4, cnt
    48  	sub	n, n, #1
    49  	cbnz	n, L(gt1)
    50  	str	x18, [rp,#0]
    51  	ret
    52  L(gt1):	ldp	x5, x4, [up,#8]
    53  	sub	up, up, #8
    54  	sub	rp, rp, #32
    55  	b	L(lo2)
    56  
    57  L(b11):	lsl	x0, x4, tnc
    58  	lsr	x9, x4, cnt
    59  	ldp	x7, x6, [up,#8]
    60  	add	n, n, #1
    61  	sub	up, up, #24
    62  	sub	rp, rp, #48
    63  	b	L(lo0)
    64  
    65  L(bx0):	ldp	x5, x4, [up,#0]
    66  	tbz	n, #1, L(b00)
    67  
    68  L(b10):	lsl	x0, x5, tnc
    69  	lsr	x13, x5, cnt
    70  	lsl	x10, x4, tnc
    71  	lsr	x18, x4, cnt
    72  	sub	n, n, #2
    73  	cbnz	n, L(gt2)
    74  	orr	x10, x10, x13
    75  	stp	x10, x18, [rp,#0]
    76  	ret
    77  L(gt2):	ldp	x5, x4, [up,#16]
    78  	orr	x10, x10, x13
    79  	str	x10, [rp,#0]
    80  	sub	rp, rp, #24
    81  	b	L(lo2)
    82  
    83  L(b00):	lsl	x0, x5, tnc
    84  	lsr	x13, x5, cnt
    85  	lsl	x10, x4, tnc
    86  	lsr	x9, x4, cnt
    87  	ldp	x7, x6, [up,#16]
    88  	orr	x10, x10, x13
    89  	str	x10, [rp,#0]
    90  	sub	up, up, #16
    91  	sub	rp, rp, #40
    92  	b	L(lo0)
    93  
    94  	ALIGN(16)
    95  L(top):	ldp	x5, x4, [up,#48]
    96  	add	rp, rp, #32		C integrate with stp?
    97  	add	up, up, #32		C integrate with ldp?
    98  	orr	x11, x11, x9
    99  	orr	x10, x10, x13
   100  	stp	x11, x10, [rp,#16]
   101  L(lo2):	lsl	x11, x5, tnc
   102  	lsr	x13, x5, cnt
   103  	lsl	x10, x4, tnc
   104  	lsr	x9, x4, cnt
   105  	ldp	x7, x6, [up,#32]
   106  	orr	x11, x11, x18
   107  	orr	x10, x10, x13
   108  	stp	x11, x10, [rp,#32]
   109  L(lo0):	sub	n, n, #4
   110  	lsl	x11, x7, tnc
   111  	lsr	x13, x7, cnt
   112  	lsl	x10, x6, tnc
   113  	lsr	x18, x6, cnt
   114  	cbnz	n, L(top)
   115  
   116  L(end):	orr	x11, x11, x9
   117  	orr	x10, x10, x13
   118  	stp	x11, x10, [rp,#48]
   119  	str	x18, [rp,#64]
   120  	ret
   121  EPILOGUE()