github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/lshiftc.asm (about) 1 dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt 2 3 dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C POWER3/PPC630 ? 35 C POWER4/PPC970 ? 36 C POWER5 2.25 37 C POWER6 9.5 38 C POWER7 2.15 39 40 C TODO 41 C * Try to reduce the number of needed live registers 42 C * Micro-optimise header code 43 C * Keep in synch with lshift.asm and rshift.asm 44 C * Could the long-scheduled std insns be less scheduled? 45 46 C INPUT PARAMETERS 47 define(`rp', `r3') 48 define(`up', `r4') 49 define(`n', `r5') 50 define(`cnt', `r6') 51 52 define(`tnc',`r0') 53 define(`u0',`r30') 54 define(`u1',`r31') 55 define(`retval',`r5') 56 57 ASM_START() 58 PROLOGUE(mpn_lshiftc) 59 std r31, -8(r1) 60 std r30, -16(r1) 61 subfic tnc, cnt, 64 62 sldi r7, n, 3 C byte count corresponding to n 63 add up, up, r7 C up = up + n 64 add rp, rp, r7 C rp = rp + n 65 rldicl. r30, n, 0,62 C r30 = n & 3, set cr0 66 cmpdi cr6, r30, 2 67 addi r31, n, 3 C compute count... 68 ld r10, -8(up) C load 1st limb for b00...b11 69 srd retval, r10, tnc 70 srdi r31, r31, 2 C ...for ctr 71 mtctr r31 C copy count into ctr 72 beq cr0, L(b00) 73 blt cr6, L(b01) 74 ld r11, -16(up) C load 2nd limb for b10 and b11 75 beq cr6, L(b10) 76 77 ALIGN(16) 78 L(b11): sld r8, r10, cnt 79 srd r9, r11, tnc 80 ld u1, -24(up) 81 addi up, up, -24 82 sld r12, r11, cnt 83 srd r7, u1, tnc 84 addi rp, rp, 16 85 bdnz L(gt3) 86 87 nor r11, r8, r9 88 sld r8, u1, cnt 89 nor r8, r8, r8 90 b L(cj3) 91 92 ALIGN(16) 93 L(gt3): ld u0, -8(up) 94 nor r11, r8, r9 95 sld r8, u1, cnt 96 srd r9, u0, tnc 97 ld u1, -16(up) 98 nor r10, r12, r7 99 b L(L11) 100 101 ALIGN(32) 102 L(b10): sld r12, r10, cnt 103 addi rp, rp, 24 104 srd r7, r11, tnc 105 bdnz L(gt2) 106 107 sld r8, r11, cnt 108 nor r10, r12, r7 109 nor r8, r8, r8 110 b L(cj2) 111 112 L(gt2): ld u0, -24(up) 113 sld r8, r11, cnt 114 srd r9, u0, tnc 115 ld u1, -32(up) 116 nor r10, r12, r7 117 sld r12, u0, cnt 118 srd r7, u1, tnc 119 ld u0, -40(up) 120 nor r11, r8, r9 121 addi up, up, -16 122 b L(L10) 123 124 ALIGN(16) 125 L(b00): ld u1, -16(up) 126 sld r12, r10, cnt 127 srd r7, u1, tnc 128 ld u0, -24(up) 129 sld r8, u1, cnt 130 srd r9, u0, tnc 131 ld u1, -32(up) 132 nor r10, r12, r7 133 sld r12, u0, cnt 134 srd r7, u1, tnc 135 addi rp, rp, 8 136 bdz L(cj4) 137 138 L(gt4): addi up, up, -32 139 ld u0, -8(up) 140 nor r11, r8, r9 141 b L(L00) 142 143 ALIGN(16) 144 L(b01): bdnz L(gt1) 145 sld r8, r10, cnt 146 nor r8, r8, r8 147 std r8, -8(rp) 148 b L(ret) 149 150 L(gt1): ld u0, -16(up) 151 sld r8, r10, cnt 152 srd r9, u0, tnc 153 ld u1, -24(up) 154 sld r12, u0, cnt 155 srd r7, u1, tnc 156 ld u0, -32(up) 157 nor r11, r8, r9 158 sld r8, u1, cnt 159 srd r9, u0, tnc 160 ld u1, -40(up) 161 addi up, up, -40 162 nor r10, r12, r7 163 bdz L(end) 164 165 ALIGN(32) 166 L(top): sld r12, u0, cnt 167 srd r7, u1, tnc 168 ld u0, -8(up) 169 std r11, -8(rp) 170 nor r11, r8, r9 171 L(L00): sld r8, u1, cnt 172 srd r9, u0, tnc 173 ld u1, -16(up) 174 std r10, -16(rp) 175 nor r10, r12, r7 176 L(L11): sld r12, u0, cnt 177 srd r7, u1, tnc 178 ld u0, -24(up) 179 std r11, -24(rp) 180 nor r11, r8, r9 181 L(L10): sld r8, u1, cnt 182 srd r9, u0, tnc 183 ld u1, -32(up) 184 addi up, up, -32 185 std r10, -32(rp) 186 addi rp, rp, -32 187 nor r10, r12, r7 188 bdnz L(top) 189 190 ALIGN(32) 191 L(end): sld r12, u0, cnt 192 srd r7, u1, tnc 193 std r11, -8(rp) 194 L(cj4): nor r11, r8, r9 195 sld r8, u1, cnt 196 std r10, -16(rp) 197 nor r8, r8, r8 198 L(cj3): nor r10, r12, r7 199 std r11, -24(rp) 200 L(cj2): std r10, -32(rp) 201 std r8, -40(rp) 202 203 L(ret): ld r31, -8(r1) 204 ld r30, -16(r1) 205 ifdef(`HAVE_ABI_mode32', 206 ` srdi r3, retval, 32 207 mr r4, retval 208 ',` mr r3, retval') 209 blr 210 EPILOGUE()