github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc32/rshift.asm (about)

     1  dnl  PowerPC-32 mpn_rshift -- Shift a number right.
     2  
     3  dnl  Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                cycles/limb
    34  C 603e:            ?
    35  C 604e:            3.0
    36  C 75x (G3):        3.0
    37  C 7400,7410 (G4):  3.0
    38  C 7445,7455 (G4+): 2.5
    39  C 7447,7457 (G4+): 2.25
    40  C power4/ppc970:   2.5
    41  C power5:          2.5
    42  
    43  C INPUT PARAMETERS
    44  C rp	r3
    45  C up	r4
    46  C n	r5
    47  C cnt	r6
    48  
    49  ASM_START()
    50  PROLOGUE(mpn_rshift)
    51  	cmpwi	cr0, r5, 30	C more than 30 limbs?
    52  	addi	r7, r3, -4	C dst-4
    53  	bgt	L(BIG)		C branch if more than 12 limbs
    54  
    55  	mtctr	r5		C copy size into CTR
    56  	subfic	r8, r6, 32
    57  	lwz	r11, 0(r4)	C load first s1 limb
    58  	slw	r3, r11, r8	C compute function return value
    59  	bdz	L(end1)
    60  
    61  L(oop):	lwzu	r10, 4(r4)
    62  	srw	r9, r11, r6
    63  	slw	r12, r10, r8
    64  	or	r9, r9, r12
    65  	stwu	r9, 4(r7)
    66  	bdz	L(end2)
    67  	lwzu	r11, 4(r4)
    68  	srw	r9, r10, r6
    69  	slw	r12, r11, r8
    70  	or	r9, r9, r12
    71  	stwu	r9, 4(r7)
    72  	bdnz	L(oop)
    73  
    74  L(end1):
    75  	srw	r0, r11, r6
    76  	stw	r0, 4(r7)
    77  	blr
    78  L(end2):
    79  	srw	r0, r10, r6
    80  	stw	r0, 4(r7)
    81  	blr
    82  
    83  L(BIG):
    84  	stwu	r1, -48(r1)
    85  	stmw	r24, 8(r1)	C save registers we are supposed to preserve
    86  	lwz	r9, 0(r4)
    87  	subfic	r8, r6, 32
    88  	slw	r3, r9, r8	C compute function return value
    89  	srw	r0, r9, r6
    90  	addi	r5, r5, -1
    91  
    92  	andi.	r10, r5, 3	C count for spill loop
    93  	beq	L(e)
    94  	mtctr	r10
    95  	lwzu	r28, 4(r4)
    96  	bdz	L(xe0)
    97  
    98  L(loop0):
    99  	srw	r12, r28, r6
   100  	slw	r24, r28, r8
   101  	lwzu	r28, 4(r4)
   102  	or	r24, r0, r24
   103  	stwu	r24, 4(r7)
   104  	mr	r0, r12
   105  	bdnz	L(loop0)	C taken at most once!
   106  
   107  L(xe0):	srw	r12, r28, r6
   108  	slw	r24, r28, r8
   109  	or	r24, r0, r24
   110  	stwu	r24, 4(r7)
   111  	mr	r0, r12
   112  
   113  L(e):	srwi	r5, r5, 2	C count for unrolled loop
   114  	addi	r5, r5, -1
   115  	mtctr	r5
   116  	lwz	r28, 4(r4)
   117  	lwz	r29, 8(r4)
   118  	lwz	r30, 12(r4)
   119  	lwzu	r31, 16(r4)
   120  
   121  L(loopU):
   122  	srw	r9, r28, r6
   123  	slw	r24, r28, r8
   124  	lwz	r28, 4(r4)
   125  	srw	r10, r29, r6
   126  	slw	r25, r29, r8
   127  	lwz	r29, 8(r4)
   128  	srw	r11, r30, r6
   129  	slw	r26, r30, r8
   130  	lwz	r30, 12(r4)
   131  	srw	r12, r31, r6
   132  	slw	r27, r31, r8
   133  	lwzu	r31, 16(r4)
   134  	or	r24, r0, r24
   135  	stw	r24, 4(r7)
   136  	or	r25, r9, r25
   137  	stw	r25, 8(r7)
   138  	or	r26, r10, r26
   139  	stw	r26, 12(r7)
   140  	or	r27, r11, r27
   141  	stwu	r27, 16(r7)
   142  	mr	r0, r12
   143  	bdnz	L(loopU)
   144  
   145  	srw	r9, r28, r6
   146  	slw	r24, r28, r8
   147  	srw	r10, r29, r6
   148  	slw	r25, r29, r8
   149  	srw	r11, r30, r6
   150  	slw	r26, r30, r8
   151  	srw	r12, r31, r6
   152  	slw	r27, r31, r8
   153  	or	r24, r0, r24
   154  	stw	r24, 4(r7)
   155  	or	r25, r9, r25
   156  	stw	r25, 8(r7)
   157  	or	r26, r10, r26
   158  	stw	r26, 12(r7)
   159  	or	r27, r11, r27
   160  	stw	r27, 16(r7)
   161  
   162  	stw	r12, 20(r7)
   163  	lmw	r24, 8(r1)	C restore registers
   164  	addi	r1, r1, 48
   165  	blr
   166  EPILOGUE()