github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/rshift.asm (about) 1 dnl PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt 2 3 dnl Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C POWER3/PPC630 ? 35 C POWER4/PPC970 ? 36 C POWER5 2.25 37 C POWER6 9.75 38 C POWER7 2.15 39 40 C TODO 41 C * Try to reduce the number of needed live registers 42 C * Micro-optimise header code 43 C * Keep in synch with lshift.asm and lshiftc.asm 44 45 C INPUT PARAMETERS 46 define(`rp', `r3') 47 define(`up', `r4') 48 define(`n', `r5') 49 define(`cnt', `r6') 50 51 define(`tnc',`r0') 52 define(`u0',`r30') 53 define(`u1',`r31') 54 define(`retval',`r5') 55 56 ASM_START() 57 PROLOGUE(mpn_rshift) 58 std r31, -8(r1) 59 std r30, -16(r1) 60 subfic tnc, cnt, 64 61 C sldi r30, n, 3 C byte count corresponding to n 62 C add rp, rp, r30 C rp = rp + n 63 C add up, up, r30 C up = up + n 64 rldicl. r30, n, 0,62 C r30 = n & 3, set cr0 65 cmpdi cr6, r30, 2 66 addi r31, n, 3 C compute count... 67 ld r10, 0(up) C load 1st limb for b00...b11 68 sld retval, r10, tnc 69 ifdef(`HAVE_ABI_mode32', 70 ` rldicl r31, r31, 62,34', C ...branch count 71 ` srdi r31, r31, 2') C ...for ctr 72 mtctr r31 C copy count into ctr 73 beq cr0, L(b00) 74 blt cr6, L(b01) 75 ld r11, 8(up) C load 2nd limb for b10 and b11 76 beq cr6, L(b10) 77 78 ALIGN(16) 79 L(b11): srd r8, r10, cnt 80 sld r9, r11, tnc 81 ld u1, 16(up) 82 addi up, up, 24 83 srd r12, r11, cnt 84 sld r7, u1, tnc 85 addi rp, rp, -16 86 bdnz L(gt3) 87 88 or r11, r8, r9 89 srd r8, u1, cnt 90 b L(cj3) 91 92 ALIGN(16) 93 L(gt3): ld u0, 0(up) 94 or r11, r8, r9 95 srd r8, u1, cnt 96 sld r9, u0, tnc 97 ld u1, 8(up) 98 or r10, r12, r7 99 b L(L11) 100 101 ALIGN(32) 102 L(b10): srd r12, r10, cnt 103 addi rp, rp, -24 104 sld r7, r11, tnc 105 bdnz L(gt2) 106 107 srd r8, r11, cnt 108 or r10, r12, r7 109 b L(cj2) 110 111 L(gt2): ld u0, 16(up) 112 srd r8, r11, cnt 113 sld r9, u0, tnc 114 ld u1, 24(up) 115 or r10, r12, r7 116 srd r12, u0, cnt 117 sld r7, u1, tnc 118 ld u0, 32(up) 119 or r11, r8, r9 120 addi up, up, 16 121 b L(L10) 122 123 ALIGN(16) 124 L(b00): ld u1, 8(up) 125 srd r12, r10, cnt 126 sld r7, u1, tnc 127 ld u0, 16(up) 128 srd r8, u1, cnt 129 sld r9, u0, tnc 130 ld u1, 24(up) 131 or r10, r12, r7 132 srd r12, u0, cnt 133 sld r7, u1, tnc 134 addi rp, rp, -8 135 bdz L(cj4) 136 137 L(gt4): addi up, up, 32 138 ld u0, 0(up) 139 or r11, r8, r9 140 b L(L00) 141 142 ALIGN(16) 143 L(b01): bdnz L(gt1) 144 srd r8, r10, cnt 145 std r8, 0(rp) 146 b L(ret) 147 148 L(gt1): ld u0, 8(up) 149 srd r8, r10, cnt 150 sld r9, u0, tnc 151 ld u1, 16(up) 152 srd r12, u0, cnt 153 sld r7, u1, tnc 154 ld u0, 24(up) 155 or r11, r8, r9 156 srd r8, u1, cnt 157 sld r9, u0, tnc 158 ld u1, 32(up) 159 addi up, up, 40 160 or r10, r12, r7 161 bdz L(end) 162 163 ALIGN(32) 164 L(top): srd r12, u0, cnt 165 sld r7, u1, tnc 166 ld u0, 0(up) 167 std r11, 0(rp) 168 or r11, r8, r9 169 L(L00): srd r8, u1, cnt 170 sld r9, u0, tnc 171 ld u1, 8(up) 172 std r10, 8(rp) 173 or r10, r12, r7 174 L(L11): srd r12, u0, cnt 175 sld r7, u1, tnc 176 ld u0, 16(up) 177 std r11, 16(rp) 178 or r11, r8, r9 179 L(L10): srd r8, u1, cnt 180 sld r9, u0, tnc 181 ld u1, 24(up) 182 addi up, up, 32 183 std r10, 24(rp) 184 addi rp, rp, 32 185 or r10, r12, r7 186 bdnz L(top) 187 188 ALIGN(32) 189 L(end): srd r12, u0, cnt 190 sld r7, u1, tnc 191 std r11, 0(rp) 192 L(cj4): or r11, r8, r9 193 srd r8, u1, cnt 194 std r10, 8(rp) 195 L(cj3): or r10, r12, r7 196 std r11, 16(rp) 197 L(cj2): std r10, 24(rp) 198 std r8, 32(rp) 199 200 L(ret): ld r31, -8(r1) 201 ld r30, -16(r1) 202 ifdef(`HAVE_ABI_mode32', 203 ` srdi r3, retval, 32 204 mr r4, retval 205 ',` mr r3, retval') 206 blr 207 EPILOGUE()