github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/arm64/lshift.asm (about)

     1  dnl  ARM64 mpn_lshift.
     2  
     3  dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of the GNU Lesser General Public License as published
     9  dnl  by the Free Software Foundation; either version 3 of the License, or (at
    10  dnl  your option) any later version.
    11  
    12  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    13  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    14  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    15  dnl  License for more details.
    16  
    17  dnl  You should have received a copy of the GNU Lesser General Public License
    18  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
    19  
    20  include(`../config.m4')
    21  
    22  C	     cycles/limb
    23  C Cortex-A53	 ?
    24  C Cortex-A57	 ?
    25  
    26  changecom(@&*$)
    27  
    28  define(`rp_arg', `x0')
    29  define(`up',     `x1')
    30  define(`n',      `x2')
    31  define(`cnt',    `x3')
    32  
    33  define(`rp',     `x16')
    34  
    35  define(`tnc',`x8')
    36  
    37  ASM_START()
    38  PROLOGUE(mpn_lshift)
    39  	add	rp, rp_arg, n, lsl #3
    40  	add	up, up, n, lsl #3
    41  	sub	tnc, xzr, cnt
    42  	tbz	n, #0, L(bx0)
    43  
    44  L(bx1):	ldr	x4, [up,#-8]
    45  	tbnz	n, #1, L(b11)
    46  
    47  L(b01):	lsr	x0, x4, tnc
    48  	lsl	x18, x4, cnt
    49  	sub	n, n, #1
    50  	cbnz	n, L(gt1)
    51  	str	x18, [rp,#-8]
    52  	ret
    53  L(gt1):	ldp	x4, x5, [up,#-24]
    54  	sub	up, up, #8
    55  	add	rp, rp, #16
    56  	b	L(lo2)
    57  
    58  L(b11):	lsr	x0, x4, tnc
    59  	lsl	x9, x4, cnt
    60  	ldp	x6, x7, [up,#-24]
    61  	add	n, n, #1
    62  	add	up, up, #8
    63  	add	rp, rp, #32
    64  	b	L(lo0)
    65  
    66  L(bx0):	ldp	x4, x5, [up,#-16]
    67  	tbz	n, #1, L(b00)
    68  
    69  L(b10):	lsr	x0, x5, tnc
    70  	lsl	x13, x5, cnt
    71  	lsr	x10, x4, tnc
    72  	lsl	x18, x4, cnt
    73  	sub	n, n, #2
    74  	cbnz	n, L(gt2)
    75  	orr	x10, x10, x13
    76  	stp	x18, x10, [rp,#-16]
    77  	ret
    78  L(gt2):	ldp	x4, x5, [up,#-32]
    79  	orr	x10, x10, x13
    80  	str	x10, [rp,#-8]
    81  	sub	up, up, #16
    82  	add	rp, rp, #8
    83  	b	L(lo2)
    84  
    85  L(b00):	lsr	x0, x5, tnc
    86  	lsl	x13, x5, cnt
    87  	lsr	x10, x4, tnc
    88  	lsl	x9, x4, cnt
    89  	ldp	x6, x7, [up,#-32]
    90  	orr	x10, x10, x13
    91  	str	x10, [rp,#-8]
    92  	add	rp, rp, #24
    93  	b	L(lo0)
    94  
    95  	ALIGN(16)
    96  L(top):	ldp	x4, x5, [up,#-48]
    97  	sub	rp, rp, #32		C integrate with stp?
    98  	sub	up, up, #32		C integrate with ldp?
    99  	orr	x11, x11, x9
   100  	orr	x10, x10, x13
   101  	stp	x10, x11, [rp,#-16]
   102  L(lo2):	lsr	x11, x5, tnc
   103  	lsl	x13, x5, cnt
   104  	lsr	x10, x4, tnc
   105  	lsl	x9, x4, cnt
   106  	ldp	x6, x7, [up,#-32]
   107  	orr	x11, x11, x18
   108  	orr	x10, x10, x13
   109  	stp	x10, x11, [rp,#-32]
   110  L(lo0):	sub	n, n, #4
   111  	lsr	x11, x7, tnc
   112  	lsl	x13, x7, cnt
   113  	lsr	x10, x6, tnc
   114  	lsl	x18, x6, cnt
   115  	cbnz	n, L(top)
   116  
   117  L(end):	orr	x11, x11, x9
   118  	orr	x10, x10, x13
   119  	stp	x10, x11, [rp,#-48]
   120  	str	x18, [rp,#-56]
   121  	ret
   122  EPILOGUE()