github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/aorsorrlshC_n.asm (about) 1 dnl PowerPC-64 mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n. 2 3 dnl Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 C cycles/limb 32 C POWER3/PPC630 1.83 (1.5 c/l should be possible) 33 C POWER4/PPC970 3 (2.0 c/l should be possible) 34 C POWER5 3 35 C POWER6 3.5-47 36 C POWER7 3 37 38 C STATUS 39 C * Try combining upx+up, and vpx+vp. 40 C * The worst case 47 c/l for POWER6 happens if the 3rd operand for ldx is 41 C greater than the 2nd operand. Yes, this addition is non-commutative wrt 42 C performance. 43 44 C INPUT PARAMETERS 45 define(`rp', `r3') 46 define(`up', `r4') 47 define(`vp', `r5') 48 define(`n', `r6') 49 50 ifdef(`DO_add', ` 51 define(`ADDSUBC', `addc $1, $2, $3') 52 define(`ADDSUBE', `adde $1, $2, $3') 53 define(INITCY, `addic $1, r1, 0') 54 define(RETVAL, `addze r3, $1') 55 define(`func', mpn_addlsh`'LSH`'_n)') 56 ifdef(`DO_sub', ` 57 define(`ADDSUBC', `subfc $1, $2, $3') 58 define(`ADDSUBE', `subfe $1, $2, $3') 59 define(INITCY, `addic $1, r1, -1') 60 define(RETVAL, `subfze r3, $1 61 neg r3, r3') 62 define(`func', mpn_sublsh`'LSH`'_n)') 63 ifdef(`DO_rsb', ` 64 define(`ADDSUBC', `subfc $1, $3, $2') 65 define(`ADDSUBE', `subfe $1, $3, $2') 66 define(INITCY, `addic $1, r1, -1') 67 define(RETVAL, `addme r3, $1') 68 define(`func', mpn_rsblsh`'LSH`'_n)') 69 70 define(`rpx', `r6') 71 define(`upx', `r7') 72 define(`vpx', `r12') 73 74 define(`s0', `r0') define(`s1', `r9') 75 define(`u0', `r8') 76 define(`v0', `r10') define(`v1', `r11') 77 78 79 ASM_START() 80 PROLOGUE(func) 81 cmpldi cr0, n, 13 82 bgt L(big) 83 84 mtctr n C copy n in ctr 85 INITCY( r0) C clear cy 86 87 ld v0, 0(vp) C load v limb 88 ld u0, 0(up) C load u limb 89 addi up, up, -8 C update up 90 addi rp, rp, -8 C update rp 91 sldi s1, v0, LSH 92 bdz L(ex1) C If done, skip loop 93 94 ALIGN(16) 95 L(lo0): ld v1, 8(vp) C load v limb 96 ADDSUBE(s1, s1, u0) C add limbs with cy, set cy 97 ldu u0, 16(up) C load u limb and update up 98 srdi s0, v0, RSH C shift down previous v limb 99 std s1, 8(rp) C store result limb 100 rldimi s0, v1, LSH, 0 C left shift v limb and merge with prev v limb 101 bdz L(ex0) C decrement ctr and exit if done 102 ldu v0, 16(vp) C load v limb and update vp 103 ADDSUBE(s0, s0, u0) C add limbs with cy, set cy 104 ld u0, 8(up) C load u limb 105 srdi s1, v1, RSH C shift down previous v limb 106 stdu s0, 16(rp) C store result limb and update rp 107 rldimi s1, v0, LSH, 0 C left shift v limb and merge with prev v limb 108 bdnz L(lo0) C decrement ctr and loop back 109 110 L(ex1): ADDSUBE(r7, s1, u0) 111 std r7, 8(rp) C store last result limb 112 srdi r0, v0, RSH 113 RETVAL( r0) 114 blr 115 L(ex0): ADDSUBE(r7, s0, u0) 116 std r7, 16(rp) C store last result limb 117 srdi r0, v1, RSH 118 RETVAL( r0) 119 blr 120 121 122 L(big): rldicl. r0, n, 0,63 C r0 = n & 1, set cr0 123 addi r6, n, -1 C ...for ctr 124 srdi r6, r6, 1 C ...for ctr 125 mtctr r6 C copy count into ctr 126 beq cr0, L(b0) 127 128 L(b1): ld v1, 0(vp) 129 ld u0, 0(up) 130 sldi s1, v1, LSH 131 srdi s0, v1, RSH 132 ld v0, 8(vp) 133 ADDSUBC(s1, s1, u0) C add limbs without cy, set cy 134 addi rpx, rp, -16 135 addi rp, rp, -8 136 sub upx, up, rp 137 sub vpx, vp, rp 138 sub up, up, rpx 139 sub vp, vp, rpx 140 addi up, up, 8 141 addi upx, upx, 16 142 addi vp, vp, 16 143 addi vpx, vpx, 24 144 b L(mid) 145 146 L(b0): ld v0, 0(vp) 147 ld u0, 0(up) 148 sldi s0, v0, LSH 149 srdi s1, v0, RSH 150 ld v1, 8(vp) 151 ADDSUBC(s0, s0, u0) C add limbs without cy, set cy 152 addi rpx, rp, -8 153 addi rp, rp, -16 154 sub upx, up, rpx 155 sub vpx, vp, rpx 156 sub up, up, rp 157 sub vp, vp, rp 158 addi up, up, 8 159 addi upx, upx, 16 160 addi vp, vp, 16 161 addi vpx, vpx, 24 162 163 ALIGN(32) 164 L(top): ldx u0, rp, up 165 ldx v0, rp, vp 166 rldimi s1, v1, LSH, 0 167 stdu s0, 16(rp) 168 srdi s0, v1, RSH 169 ADDSUBE(s1, s1, u0) C add limbs with cy, set cy 170 L(mid): ldx u0, rpx, upx 171 ldx v1, rpx, vpx 172 rldimi s0, v0, LSH, 0 173 stdu s1, 16(rpx) 174 srdi s1, v0, RSH 175 ADDSUBE(s0, s0, u0) C add limbs with cy, set cy 176 bdnz L(top) C decrement CTR and loop back 177 178 ldx u0, rp, up 179 rldimi s1, v1, LSH, 0 180 std s0, 16(rp) 181 srdi s0, v1, RSH 182 ADDSUBE(s1, s1, u0) C add limbs with cy, set cy 183 std s1, 24(rp) 184 185 RETVAL( s0) 186 blr 187 EPILOGUE()