github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/rshift.asm (about)

     1  dnl  PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
     2  
     3  dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                   cycles/limb
    34  C POWER3/PPC630          ?
    35  C POWER4/PPC970          ?
    36  C POWER5                 2.25
    37  C POWER6                 9.75
    38  C POWER7                 2.15
    39  
    40  C TODO
    41  C  * Try to reduce the number of needed live registers
    42  C  * Micro-optimise header code
    43  C  * Keep in synch with lshift.asm and lshiftc.asm
    44  
    45  C INPUT PARAMETERS
    46  define(`rp',  `r3')
    47  define(`up',  `r4')
    48  define(`n',   `r5')
    49  define(`cnt', `r6')
    50  
    51  define(`tnc',`r0')
    52  define(`u0',`r30')
    53  define(`u1',`r31')
    54  define(`retval',`r5')
    55  
    56  ASM_START()
    57  PROLOGUE(mpn_rshift)
    58  	std	r31, -8(r1)
    59  	std	r30, -16(r1)
    60  	subfic	tnc, cnt, 64
    61  C	sldi	r30, n, 3	C byte count corresponding to n
    62  C	add	rp, rp, r30	C rp = rp + n
    63  C	add	up, up, r30	C up = up + n
    64  	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
    65  	cmpdi	cr6, r30, 2
    66  	addi	r31, n, 3	C compute count...
    67  	ld	r10, 0(up)	C load 1st limb for b00...b11
    68  	sld	retval, r10, tnc
    69  ifdef(`HAVE_ABI_mode32',
    70  `	rldicl	r31, r31, 62,34',	C ...branch count
    71  `	srdi	r31, r31, 2')	C ...for ctr
    72  	mtctr	r31		C copy count into ctr
    73  	beq	cr0, L(b00)
    74  	blt	cr6, L(b01)
    75  	ld	r11, 8(up)	C load 2nd limb for b10 and b11
    76  	beq	cr6, L(b10)
    77  
    78  	ALIGN(16)
    79  L(b11):	srd	r8, r10, cnt
    80  	sld	r9, r11, tnc
    81  	ld	u1, 16(up)
    82  	addi	up, up, 24
    83  	srd	r12, r11, cnt
    84  	sld	r7, u1, tnc
    85  	addi	rp, rp, -16
    86  	bdnz	L(gt3)
    87  
    88  	or	r11, r8, r9
    89  	srd	r8, u1, cnt
    90  	b	L(cj3)
    91  
    92  	ALIGN(16)
    93  L(gt3):	ld	u0, 0(up)
    94  	or	r11, r8, r9
    95  	srd	r8, u1, cnt
    96  	sld	r9, u0, tnc
    97  	ld	u1, 8(up)
    98  	or	r10, r12, r7
    99  	b	L(L11)
   100  
   101  	ALIGN(32)
   102  L(b10):	srd	r12, r10, cnt
   103  	addi	rp, rp, -24
   104  	sld	r7, r11, tnc
   105  	bdnz	L(gt2)
   106  
   107  	srd	r8, r11, cnt
   108  	or	r10, r12, r7
   109  	b	L(cj2)
   110  
   111  L(gt2):	ld	u0, 16(up)
   112  	srd	r8, r11, cnt
   113  	sld	r9, u0, tnc
   114  	ld	u1, 24(up)
   115  	or	r10, r12, r7
   116  	srd	r12, u0, cnt
   117  	sld	r7, u1, tnc
   118  	ld	u0, 32(up)
   119  	or	r11, r8, r9
   120  	addi	up, up, 16
   121  	b	L(L10)
   122  
   123  	ALIGN(16)
   124  L(b00):	ld	u1, 8(up)
   125  	srd	r12, r10, cnt
   126  	sld	r7, u1, tnc
   127  	ld	u0, 16(up)
   128  	srd	r8, u1, cnt
   129  	sld	r9, u0, tnc
   130  	ld	u1, 24(up)
   131  	or	r10, r12, r7
   132  	srd	r12, u0, cnt
   133  	sld	r7, u1, tnc
   134  	addi	rp, rp, -8
   135  	bdz	L(cj4)
   136  
   137  L(gt4):	addi	up, up, 32
   138  	ld	u0, 0(up)
   139  	or	r11, r8, r9
   140  	b	L(L00)
   141  
   142  	ALIGN(16)
   143  L(b01):	bdnz	L(gt1)
   144  	srd	r8, r10, cnt
   145  	std	r8, 0(rp)
   146  	b	L(ret)
   147  
   148  L(gt1):	ld	u0, 8(up)
   149  	srd	r8, r10, cnt
   150  	sld	r9, u0, tnc
   151  	ld	u1, 16(up)
   152  	srd	r12, u0, cnt
   153  	sld	r7, u1, tnc
   154  	ld	u0, 24(up)
   155  	or	r11, r8, r9
   156  	srd	r8, u1, cnt
   157  	sld	r9, u0, tnc
   158  	ld	u1, 32(up)
   159  	addi	up, up, 40
   160  	or	r10, r12, r7
   161  	bdz	L(end)
   162  
   163  	ALIGN(32)
   164  L(top):	srd	r12, u0, cnt
   165  	sld	r7, u1, tnc
   166  	ld	u0, 0(up)
   167  	std	r11, 0(rp)
   168  	or	r11, r8, r9
   169  L(L00):	srd	r8, u1, cnt
   170  	sld	r9, u0, tnc
   171  	ld	u1, 8(up)
   172  	std	r10, 8(rp)
   173  	or	r10, r12, r7
   174  L(L11):	srd	r12, u0, cnt
   175  	sld	r7, u1, tnc
   176  	ld	u0, 16(up)
   177  	std	r11, 16(rp)
   178  	or	r11, r8, r9
   179  L(L10):	srd	r8, u1, cnt
   180  	sld	r9, u0, tnc
   181  	ld	u1, 24(up)
   182  	addi	up, up, 32
   183  	std	r10, 24(rp)
   184  	addi	rp, rp, 32
   185  	or	r10, r12, r7
   186  	bdnz	L(top)
   187  
   188  	ALIGN(32)
   189  L(end):	srd	r12, u0, cnt
   190  	sld	r7, u1, tnc
   191  	std	r11, 0(rp)
   192  L(cj4):	or	r11, r8, r9
   193  	srd	r8, u1, cnt
   194  	std	r10, 8(rp)
   195  L(cj3):	or	r10, r12, r7
   196  	std	r11, 16(rp)
   197  L(cj2):	std	r10, 24(rp)
   198  	std	r8, 32(rp)
   199  
   200  L(ret):	ld	r31, -8(r1)
   201  	ld	r30, -16(r1)
   202  ifdef(`HAVE_ABI_mode32',
   203  `	srdi	r3, retval, 32
   204  	mr	r4, retval
   205  ',`	mr	r3, retval')
   206  	blr
   207  EPILOGUE()