github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc32/lshiftc.asm (about)

     1  dnl  PowerPC-32 mpn_lshiftc.
     2  
     3  dnl  Copyright 1995, 1998, 2000, 2002-2005, 2010 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                cycles/limb
    34  C 603e:            ?
    35  C 604e:            3.0
    36  C 75x (G3):        3.0
    37  C 7400,7410 (G4):  3.0
    38  C 7445,7455 (G4+): 2.5
    39  C 7447,7457 (G4+): 2.25
    40  C power4/ppc970:   2.5
    41  C power5:          2.5
    42  
    43  C INPUT PARAMETERS
    44  C rp	r3
    45  C up	r4
    46  C n	r5
    47  C cnt	r6
    48  
    49  ASM_START()
    50  PROLOGUE(mpn_lshiftc)
    51  	cmpwi	cr0, r5, 30	C more than 30 limbs?
    52  	slwi	r0, r5, 2
    53  	add	r4, r4, r0	C make r4 point at end of s1
    54  	add	r7, r3, r0	C make r7 point at end of res
    55  	bgt	L(BIG)		C branch if more than 12 limbs
    56  
    57  	mtctr	r5		C copy size into CTR
    58  	subfic	r8, r6, 32
    59  	lwzu	r11, -4(r4)	C load first s1 limb
    60  	srw	r3, r11, r8	C compute function return value
    61  	bdz	L(end1)
    62  
    63  L(oop):	lwzu	r10, -4(r4)
    64  	slw	r9, r11, r6
    65  	srw	r12, r10, r8
    66  	nor	r9, r9, r12
    67  	stwu	r9, -4(r7)
    68  	bdz	L(end2)
    69  	lwzu	r11, -4(r4)
    70  	slw	r9, r10, r6
    71  	srw	r12, r11, r8
    72  	nor	r9, r9, r12
    73  	stwu	r9, -4(r7)
    74  	bdnz	L(oop)
    75  
    76  L(end1):
    77  	slw	r0, r11, r6
    78  	nor	r0, r0, r0
    79  	stw	r0, -4(r7)
    80  	blr
    81  L(end2):
    82  	slw	r0, r10, r6
    83  	nor	r0, r0, r0
    84  	stw	r0, -4(r7)
    85  	blr
    86  
    87  L(BIG):
    88  	stwu	r1, -48(r1)
    89  	stmw	r24, 8(r1)	C save registers we are supposed to preserve
    90  	lwzu	r9, -4(r4)
    91  	subfic	r8, r6, 32
    92  	srw	r3, r9, r8	C compute function return value
    93  	slw	r0, r9, r6
    94  	addi	r5, r5, -1
    95  
    96  	andi.	r10, r5, 3	C count for spill loop
    97  	beq	L(e)
    98  	mtctr	r10
    99  	lwzu	r28, -4(r4)
   100  	bdz	L(xe0)
   101  
   102  L(loop0):
   103  	slw	r12, r28, r6
   104  	srw	r24, r28, r8
   105  	lwzu	r28, -4(r4)
   106  	nor	r24, r0, r24
   107  	stwu	r24, -4(r7)
   108  	mr	r0, r12
   109  	bdnz	L(loop0)	C taken at most once!
   110  
   111  L(xe0):	slw	r12, r28, r6
   112  	srw	r24, r28, r8
   113  	nor	r24, r0, r24
   114  	stwu	r24, -4(r7)
   115  	mr	r0, r12
   116  
   117  L(e):	srwi	r5, r5, 2	C count for unrolled loop
   118  	addi	r5, r5, -1
   119  	mtctr	r5
   120  	lwz	r28, -4(r4)
   121  	lwz	r29, -8(r4)
   122  	lwz	r30, -12(r4)
   123  	lwzu	r31, -16(r4)
   124  
   125  L(loopU):
   126  	slw	r9, r28, r6
   127  	srw	r24, r28, r8
   128  	lwz	r28, -4(r4)
   129  	slw	r10, r29, r6
   130  	srw	r25, r29, r8
   131  	lwz	r29, -8(r4)
   132  	slw	r11, r30, r6
   133  	srw	r26, r30, r8
   134  	lwz	r30, -12(r4)
   135  	slw	r12, r31, r6
   136  	srw	r27, r31, r8
   137  	lwzu	r31, -16(r4)
   138  	nor	r24, r0, r24
   139  	stw	r24, -4(r7)
   140  	nor	r25, r9, r25
   141  	stw	r25, -8(r7)
   142  	nor	r26, r10, r26
   143  	stw	r26, -12(r7)
   144  	nor	r27, r11, r27
   145  	stwu	r27, -16(r7)
   146  	mr	r0, r12
   147  	bdnz	L(loopU)
   148  
   149  	slw	r9, r28, r6
   150  	srw	r24, r28, r8
   151  	slw	r10, r29, r6
   152  	srw	r25, r29, r8
   153  	slw	r11, r30, r6
   154  	srw	r26, r30, r8
   155  	slw	r12, r31, r6
   156  	srw	r27, r31, r8
   157  	nor	r24, r0, r24
   158  	stw	r24, -4(r7)
   159  	nor	r25, r9, r25
   160  	stw	r25, -8(r7)
   161  	nor	r26, r10, r26
   162  	stw	r26, -12(r7)
   163  	nor	r27, r11, r27
   164  	stw	r27, -16(r7)
   165  	nor	r12, r12, r12
   166  	stw	r12, -20(r7)
   167  	lmw	r24, 8(r1)	C restore registers
   168  	addi	r1, r1, 48
   169  	blr
   170  EPILOGUE()