github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/lshiftc.asm (about)

     1  dnl  PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
     2  
     3  dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                   cycles/limb
    34  C POWER3/PPC630          ?
    35  C POWER4/PPC970          ?
    36  C POWER5                 2.25
    37  C POWER6                 9.5
    38  C POWER7                 2.15
    39  
    40  C TODO
    41  C  * Try to reduce the number of needed live registers
    42  C  * Micro-optimise header code
    43  C  * Keep in synch with lshift.asm and rshift.asm
    44  C  * Could the long-scheduled std insns be less scheduled?
    45  
    46  C INPUT PARAMETERS
    47  define(`rp',  `r3')
    48  define(`up',  `r4')
    49  define(`n',   `r5')
    50  define(`cnt', `r6')
    51  
    52  define(`tnc',`r0')
    53  define(`u0',`r30')
    54  define(`u1',`r31')
    55  define(`retval',`r5')
    56  
    57  ASM_START()
    58  PROLOGUE(mpn_lshiftc)
    59  	std	r31, -8(r1)
    60  	std	r30, -16(r1)
    61  	subfic	tnc, cnt, 64
    62  	sldi	r7, n, 3	C byte count corresponding to n
    63  	add	up, up, r7	C up = up + n
    64  	add	rp, rp, r7	C rp = rp + n
    65  	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
    66  	cmpdi	cr6, r30, 2
    67  	addi	r31, n, 3	C compute count...
    68  	ld	r10, -8(up)	C load 1st limb for b00...b11
    69  	srd	retval, r10, tnc
    70  	srdi	r31, r31, 2	C ...for ctr
    71  	mtctr	r31		C copy count into ctr
    72  	beq	cr0, L(b00)
    73  	blt	cr6, L(b01)
    74  	ld	r11, -16(up)	C load 2nd limb for b10 and b11
    75  	beq	cr6, L(b10)
    76  
    77  	ALIGN(16)
    78  L(b11):	sld	r8, r10, cnt
    79  	srd	r9, r11, tnc
    80  	ld	u1, -24(up)
    81  	addi	up, up, -24
    82  	sld	r12, r11, cnt
    83  	srd	r7, u1, tnc
    84  	addi	rp, rp, 16
    85  	bdnz	L(gt3)
    86  
    87  	nor	r11, r8, r9
    88  	sld	r8, u1, cnt
    89  	nor	r8, r8, r8
    90  	b	L(cj3)
    91  
    92  	ALIGN(16)
    93  L(gt3):	ld	u0, -8(up)
    94  	nor	r11, r8, r9
    95  	sld	r8, u1, cnt
    96  	srd	r9, u0, tnc
    97  	ld	u1, -16(up)
    98  	nor	r10, r12, r7
    99  	b	L(L11)
   100  
   101  	ALIGN(32)
   102  L(b10):	sld	r12, r10, cnt
   103  	addi	rp, rp, 24
   104  	srd	r7, r11, tnc
   105  	bdnz	L(gt2)
   106  
   107  	sld	r8, r11, cnt
   108  	nor	r10, r12, r7
   109  	nor	r8, r8, r8
   110  	b	L(cj2)
   111  
   112  L(gt2):	ld	u0, -24(up)
   113  	sld	r8, r11, cnt
   114  	srd	r9, u0, tnc
   115  	ld	u1, -32(up)
   116  	nor	r10, r12, r7
   117  	sld	r12, u0, cnt
   118  	srd	r7, u1, tnc
   119  	ld	u0, -40(up)
   120  	nor	r11, r8, r9
   121  	addi	up, up, -16
   122  	b	L(L10)
   123  
   124  	ALIGN(16)
   125  L(b00):	ld	u1, -16(up)
   126  	sld	r12, r10, cnt
   127  	srd	r7, u1, tnc
   128  	ld	u0, -24(up)
   129  	sld	r8, u1, cnt
   130  	srd	r9, u0, tnc
   131  	ld	u1, -32(up)
   132  	nor	r10, r12, r7
   133  	sld	r12, u0, cnt
   134  	srd	r7, u1, tnc
   135  	addi	rp, rp, 8
   136  	bdz	L(cj4)
   137  
   138  L(gt4):	addi	up, up, -32
   139  	ld	u0, -8(up)
   140  	nor	r11, r8, r9
   141  	b	L(L00)
   142  
   143  	ALIGN(16)
   144  L(b01):	bdnz	L(gt1)
   145  	sld	r8, r10, cnt
   146  	nor	r8, r8, r8
   147  	std	r8, -8(rp)
   148  	b	L(ret)
   149  
   150  L(gt1):	ld	u0, -16(up)
   151  	sld	r8, r10, cnt
   152  	srd	r9, u0, tnc
   153  	ld	u1, -24(up)
   154  	sld	r12, u0, cnt
   155  	srd	r7, u1, tnc
   156  	ld	u0, -32(up)
   157  	nor	r11, r8, r9
   158  	sld	r8, u1, cnt
   159  	srd	r9, u0, tnc
   160  	ld	u1, -40(up)
   161  	addi	up, up, -40
   162  	nor	r10, r12, r7
   163  	bdz	L(end)
   164  
   165  	ALIGN(32)
   166  L(top):	sld	r12, u0, cnt
   167  	srd	r7, u1, tnc
   168  	ld	u0, -8(up)
   169  	std	r11, -8(rp)
   170  	nor	r11, r8, r9
   171  L(L00):	sld	r8, u1, cnt
   172  	srd	r9, u0, tnc
   173  	ld	u1, -16(up)
   174  	std	r10, -16(rp)
   175  	nor	r10, r12, r7
   176  L(L11):	sld	r12, u0, cnt
   177  	srd	r7, u1, tnc
   178  	ld	u0, -24(up)
   179  	std	r11, -24(rp)
   180  	nor	r11, r8, r9
   181  L(L10):	sld	r8, u1, cnt
   182  	srd	r9, u0, tnc
   183  	ld	u1, -32(up)
   184  	addi	up, up, -32
   185  	std	r10, -32(rp)
   186  	addi	rp, rp, -32
   187  	nor	r10, r12, r7
   188  	bdnz	L(top)
   189  
   190  	ALIGN(32)
   191  L(end):	sld	r12, u0, cnt
   192  	srd	r7, u1, tnc
   193  	std	r11, -8(rp)
   194  L(cj4):	nor	r11, r8, r9
   195  	sld	r8, u1, cnt
   196  	std	r10, -16(rp)
   197  	nor	r8, r8, r8
   198  L(cj3):	nor	r10, r12, r7
   199  	std	r11, -24(rp)
   200  L(cj2):	std	r10, -32(rp)
   201  	std	r8, -40(rp)
   202  
   203  L(ret):	ld	r31, -8(r1)
   204  	ld	r30, -16(r1)
   205  ifdef(`HAVE_ABI_mode32',
   206  `	srdi	r3, retval, 32
   207  	mr	r4, retval
   208  ',`	mr	r3, retval')
   209  	blr
   210  EPILOGUE()