github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc32/750/lshift.asm (about)

     1  dnl  PowerPC 750 mpn_lshift -- mpn left shift.
     2  
     3  dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C       cycles/limb
    35  C 750:     3.0
    36  C 7400:    3.0
    37  
    38  
    39  C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
    40  C                       unsigned shift);
    41  C
    42  C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but
    43  C smaller and saving about 30 or so cycles of overhead.
    44  
    45  ASM_START()
    46  PROLOGUE(mpn_lshift)
    47  
    48  	C r3	dst
    49  	C r4	src
    50  	C r5	size
    51  	C r6	shift
    52  
    53  	mtctr	r5		C size
    54  	slwi	r5, r5, 2	C 4*size
    55  
    56  	subfic	r7, r6, 32	C 32-shift
    57  	add	r4, r4, r5	C &src[size]
    58  
    59  	add	r5, r3, r5	C &dst[size]
    60  	lwz	r8, -4(r4)	C src[size-1]
    61  	bdz	L(one)
    62  
    63  	lwzu	r9, -8(r4)	C src[size-2]
    64  
    65  	srw	r3, r8, r7	C return value
    66  	slw	r8, r8, r6	C src[size-1] << shift
    67  	bdz	L(two)
    68  
    69  
    70  L(top):
    71  	C r3	return value
    72  	C r4	src, incrementing
    73  	C r5	dst, incrementing
    74  	C r6	lshift
    75  	C r7	32-shift
    76  	C r8	src[i+1] << shift
    77  	C r9	src[i]
    78  	C r10
    79  
    80  	lwzu	r10, -4(r4)
    81  	srw	r11, r9, r7
    82  
    83  	or	r8, r8, r11
    84  	stwu	r8, -4(r5)
    85  
    86  	slw	r8, r9, r6
    87  	bdz	L(odd)
    88  
    89  	C r8	src[i+1] << shift
    90  	C r9
    91  	C r10	src[i]
    92  
    93  	lwzu	r9, -4(r4)
    94  	srw	r11, r10, r7
    95  
    96  	or	r8, r8, r11
    97  	stwu	r8, -4(r5)
    98  
    99  	slw	r8, r10, r6
   100  	bdnz	L(top)
   101  
   102  
   103  L(two):
   104  	C r3	return value
   105  	C r4
   106  	C r5	&dst[2]
   107  	C r6	shift
   108  	C r7	32-shift
   109  	C r8	src[1] << shift
   110  	C r9	src[0]
   111  	C r10
   112  
   113  	srw	r11, r9, r7
   114  	slw	r12, r9, r6	C src[0] << shift
   115  
   116  	or	r8, r8, r11
   117  	stw	r12, -8(r5)	C dst[0]
   118  
   119  	stw	r8, -4(r5)	C dst[1]
   120  	blr
   121  
   122  
   123  L(odd):
   124  	C r3	return value
   125  	C r4
   126  	C r5	&dst[2]
   127  	C r6	shift
   128  	C r7	32-shift
   129  	C r8	src[1] << shift
   130  	C r9
   131  	C r10	src[0]
   132  
   133  	srw	r11, r10, r7
   134  	slw	r12, r10, r6
   135  
   136  	or	r8, r8, r11
   137  	stw	r12, -8(r5)	C dst[0]
   138  
   139  	stw	r8, -4(r5)	C dst[1]
   140  	blr
   141  
   142  
   143  L(one):
   144  	C r5	&dst[1]
   145  	C r6	shift
   146  	C r7	32-shift
   147  	C r8	src[0]
   148  
   149  	srw	r3, r8, r7	C return value
   150  	slw	r8, r8, r6	C src[size-1] << shift
   151  
   152  	stw	r8, -4(r5)	C dst[0]
   153  	blr
   154  
   155  EPILOGUE(mpn_lshift)