github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc32/750/rshift.asm (about) 1 dnl PowerPC 750 mpn_rshift -- mpn right shift. 2 3 dnl Copyright 2002, 2003 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C cycles/limb 35 C 750: 3.0 36 C 7400: 3.0 37 38 39 C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, 40 C unsigned shift); 41 C 42 C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but 43 C smaller and saving about 30 or so cycles of overhead. 44 45 ASM_START() 46 PROLOGUE(mpn_rshift) 47 48 C r3 dst 49 C r4 src 50 C r5 size 51 C r6 shift 52 53 mtctr r5 C size 54 lwz r8, 0(r4) C src[0] 55 56 subfic r7, r6, 32 C 32-shift 57 addi r5, r3, -4 C dst-4 58 59 slw r3, r8, r7 C return value 60 bdz L(one) 61 62 lwzu r9, 4(r4) C src[1] 63 srw r8, r8, r6 C src[0] >> shift 64 bdz L(two) 65 66 67 L(top): 68 C r3 return value 69 C r4 src, incrementing 70 C r5 dst, incrementing 71 C r6 shift 72 C r7 32-shift 73 C r8 src[i-1] >> shift 74 C r9 src[i] 75 C r10 76 77 lwzu r10, 4(r4) 78 slw r11, r9, r7 79 80 or r8, r8, r11 81 stwu r8, 4(r5) 82 83 srw r8, r9, r6 84 bdz L(odd) 85 86 C r8 src[i-1] >> shift 87 C r9 88 C r10 src[i] 89 90 lwzu r9, 4(r4) 91 slw r11, r10, r7 92 93 or r8, r8, r11 94 stwu r8, 4(r5) 95 96 srw r8, r10, r6 97 bdnz L(top) 98 99 100 L(two): 101 C r3 return value 102 C r4 103 C r5 &dst[size-2] 104 C r6 shift 105 C r7 32-shift 106 C r8 src[size-2] >> shift 107 C r9 src[size-1] 108 C r10 109 110 slw r11, r9, r7 111 srw r12, r9, r6 C src[size-1] >> shift 112 113 or r8, r8, r11 114 stw r12, 8(r5) C dst[size-1] 115 116 stw r8, 4(r5) C dst[size-2] 117 blr 118 119 120 L(odd): 121 C r3 return value 122 C r4 123 C r5 &dst[size-2] 124 C r6 shift 125 C r7 32-shift 126 C r8 src[size-2] >> shift 127 C r9 128 C r10 src[size-1] 129 130 slw r11, r10, r7 131 srw r12, r10, r6 132 133 or r8, r8, r11 134 stw r12, 8(r5) C dst[size-1] 135 136 stw r8, 4(r5) C dst[size-2] 137 blr 138 139 140 L(one): 141 C r3 return value 142 C r4 143 C r5 dst-4 144 C r6 shift 145 C r7 146 C r8 src[0] 147 148 srw r8, r8, r6 149 150 stw r8, 4(r5) C dst[0] 151 blr 152 153 EPILOGUE(mpn_rshift)