github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc32/lshift.asm (about)

     1  dnl  PowerPC-32 mpn_lshift -- Shift a number left.
     2  
     3  dnl  Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                cycles/limb
    34  C 603e:            ?
    35  C 604e:            3.0
    36  C 75x (G3):        3.0
    37  C 7400,7410 (G4):  3.0
    38  C 7445,7455 (G4+): 2.5
    39  C 7447,7457 (G4+): 2.25
    40  C power4/ppc970:   2.5
    41  C power5:          2.5
    42  
    43  C INPUT PARAMETERS
    44  C rp	r3
    45  C up	r4
    46  C n	r5
    47  C cnt	r6
    48  
    49  ASM_START()
    50  PROLOGUE(mpn_lshift)
    51  	cmpwi	cr0, r5, 30	C more than 30 limbs?
    52  	slwi	r0, r5, 2
    53  	add	r4, r4, r0	C make r4 point at end of s1
    54  	add	r7, r3, r0	C make r7 point at end of res
    55  	bgt	L(BIG)		C branch if more than 12 limbs
    56  
    57  	mtctr	r5		C copy size into CTR
    58  	subfic	r8, r6, 32
    59  	lwzu	r11, -4(r4)	C load first s1 limb
    60  	srw	r3, r11, r8	C compute function return value
    61  	bdz	L(end1)
    62  
    63  L(oop):	lwzu	r10, -4(r4)
    64  	slw	r9, r11, r6
    65  	srw	r12, r10, r8
    66  	or	r9, r9, r12
    67  	stwu	r9, -4(r7)
    68  	bdz	L(end2)
    69  	lwzu	r11, -4(r4)
    70  	slw	r9, r10, r6
    71  	srw	r12, r11, r8
    72  	or	r9, r9, r12
    73  	stwu	r9, -4(r7)
    74  	bdnz	L(oop)
    75  
    76  L(end1):
    77  	slw	r0, r11, r6
    78  	stw	r0, -4(r7)
    79  	blr
    80  L(end2):
    81  	slw	r0, r10, r6
    82  	stw	r0, -4(r7)
    83  	blr
    84  
    85  L(BIG):
    86  	stwu	r1, -48(r1)
    87  	stmw	r24, 8(r1)	C save registers we are supposed to preserve
    88  	lwzu	r9, -4(r4)
    89  	subfic	r8, r6, 32
    90  	srw	r3, r9, r8	C compute function return value
    91  	slw	r0, r9, r6
    92  	addi	r5, r5, -1
    93  
    94  	andi.	r10, r5, 3	C count for spill loop
    95  	beq	L(e)
    96  	mtctr	r10
    97  	lwzu	r28, -4(r4)
    98  	bdz	L(xe0)
    99  
   100  L(loop0):
   101  	slw	r12, r28, r6
   102  	srw	r24, r28, r8
   103  	lwzu	r28, -4(r4)
   104  	or	r24, r0, r24
   105  	stwu	r24, -4(r7)
   106  	mr	r0, r12
   107  	bdnz	L(loop0)	C taken at most once!
   108  
   109  L(xe0):	slw	r12, r28, r6
   110  	srw	r24, r28, r8
   111  	or	r24, r0, r24
   112  	stwu	r24, -4(r7)
   113  	mr	r0, r12
   114  
   115  L(e):	srwi	r5, r5, 2	C count for unrolled loop
   116  	addi	r5, r5, -1
   117  	mtctr	r5
   118  	lwz	r28, -4(r4)
   119  	lwz	r29, -8(r4)
   120  	lwz	r30, -12(r4)
   121  	lwzu	r31, -16(r4)
   122  
   123  L(loopU):
   124  	slw	r9, r28, r6
   125  	srw	r24, r28, r8
   126  	lwz	r28, -4(r4)
   127  	slw	r10, r29, r6
   128  	srw	r25, r29, r8
   129  	lwz	r29, -8(r4)
   130  	slw	r11, r30, r6
   131  	srw	r26, r30, r8
   132  	lwz	r30, -12(r4)
   133  	slw	r12, r31, r6
   134  	srw	r27, r31, r8
   135  	lwzu	r31, -16(r4)
   136  	or	r24, r0, r24
   137  	stw	r24, -4(r7)
   138  	or	r25, r9, r25
   139  	stw	r25, -8(r7)
   140  	or	r26, r10, r26
   141  	stw	r26, -12(r7)
   142  	or	r27, r11, r27
   143  	stwu	r27, -16(r7)
   144  	mr	r0, r12
   145  	bdnz	L(loopU)
   146  
   147  	slw	r9, r28, r6
   148  	srw	r24, r28, r8
   149  	slw	r10, r29, r6
   150  	srw	r25, r29, r8
   151  	slw	r11, r30, r6
   152  	srw	r26, r30, r8
   153  	slw	r12, r31, r6
   154  	srw	r27, r31, r8
   155  	or	r24, r0, r24
   156  	stw	r24, -4(r7)
   157  	or	r25, r9, r25
   158  	stw	r25, -8(r7)
   159  	or	r26, r10, r26
   160  	stw	r26, -12(r7)
   161  	or	r27, r11, r27
   162  	stw	r27, -16(r7)
   163  
   164  	stw	r12, -20(r7)
   165  	lmw	r24, 8(r1)	C restore registers
   166  	addi	r1, r1, 48
   167  	blr
   168  EPILOGUE()