github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/pa64/aorslsh1_n.asm (about) 1 dnl PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1). 2 3 dnl Copyright 2003 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C cycles/limb 34 C 8000,8200: 2 35 C 8500,8600,8700: 1.75 36 37 C TODO 38 C * Write special feed-in code for each (n mod 8). (See the ia64 code.) 39 C * Try to make this run at closer to 1.5 c/l. 40 C * Set up register aliases (define(`u0',`%r19')). 41 C * Explicitly align loop. 42 43 dnl INPUT PARAMETERS 44 define(`rp',`%r26') 45 define(`up',`%r25') 46 define(`vp',`%r24') 47 define(`n',`%r23') 48 49 ifdef(`OPERATION_addlsh1_n',` 50 define(ADCSBC, `add,dc') 51 define(INITC, `ldi 0,') 52 define(func, mpn_addlsh1_n) 53 ') 54 ifdef(`OPERATION_sublsh1_n',` 55 define(ADCSBC, `sub,db') 56 define(INITC, `ldi 1,') 57 define(func, mpn_sublsh1_n) 58 ') 59 60 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) 61 62 ifdef(`HAVE_ABI_2_0w',` 63 define(LEVEL, `.level 2.0w') 64 define(RETREG, `%r28') 65 define(CLRRET1, `dnl') 66 ') 67 ifdef(`HAVE_ABI_2_0n',` 68 define(LEVEL, `.level 2.0') 69 define(RETREG, `%r29') 70 define(CLRRET1, `ldi 0, %r28') 71 ') 72 73 LEVEL 74 PROLOGUE(func) 75 std,ma %r3, 0x100(%r30) C save reg 76 77 INITC %r1 C init saved cy 78 79 C Primitive code for the first (n mod 8) limbs: 80 extrd,u n, 63, 3, %r22 C count for loop0 81 comib,= 0, %r22, L(unrolled) C skip loop0? 82 copy %r0, %r28 83 LDEF(loop0) 84 ldd 0(vp), %r21 85 ldo 8(vp), vp 86 ldd 0(up), %r19 87 ldo 8(up), up 88 shrpd %r21, %r28, 63, %r31 89 addi -1, %r1, %r0 C restore cy 90 ADCSBC %r19, %r31, %r29 91 std %r29, 0(rp) 92 add,dc %r0, %r0, %r1 C save cy 93 copy %r21, %r28 94 addib,> -1, %r22, L(loop0) 95 ldo 8(rp), rp 96 97 addib,>= -8, n, L(unrolled) 98 addi -1, %r1, %r0 C restore cy 99 100 shrpd %r0, %r28, 63, %r28 101 ADCSBC %r0, %r28, RETREG 102 ifdef(`OPERATION_sublsh1_n', 103 ` sub %r0, RETREG, RETREG') 104 CLRRET1 105 106 bve (%r2) 107 ldd,mb -0x100(%r30), %r3 108 109 110 LDEF(unrolled) 111 std %r4, -0xf8(%r30) C save reg 112 ldd 0(vp), %r4 113 std %r5, -0xf0(%r30) C save reg 114 ldd 8(vp), %r5 115 std %r6, -0xe8(%r30) C save reg 116 ldd 16(vp), %r6 117 std %r7, -0xe0(%r30) C save reg 118 119 ldd 24(vp), %r7 120 shrpd %r4, %r28, 63, %r31 121 std %r8, -0xd8(%r30) C save reg 122 ldd 32(vp), %r8 123 shrpd %r5, %r4, 63, %r4 124 std %r9, -0xd0(%r30) C save reg 125 ldd 40(vp), %r9 126 shrpd %r6, %r5, 63, %r5 127 ldd 48(vp), %r3 128 shrpd %r7, %r6, 63, %r6 129 ldd 56(vp), %r28 130 shrpd %r8, %r7, 63, %r7 131 ldd 0(up), %r19 132 shrpd %r9, %r8, 63, %r8 133 ldd 8(up), %r20 134 shrpd %r3, %r9, 63, %r9 135 ldd 16(up), %r21 136 shrpd %r28, %r3, 63, %r3 137 ldd 24(up), %r22 138 139 nop C alignment FIXME 140 addib,<= -8, n, L(end) 141 addi -1, %r1, %r0 C restore cy 142 LDEF(loop) 143 ADCSBC %r19, %r31, %r29 144 ldd 32(up), %r19 145 std %r29, 0(rp) 146 ADCSBC %r20, %r4, %r29 147 ldd 40(up), %r20 148 std %r29, 8(rp) 149 ADCSBC %r21, %r5, %r29 150 ldd 48(up), %r21 151 std %r29, 16(rp) 152 ADCSBC %r22, %r6, %r29 153 ldd 56(up), %r22 154 std %r29, 24(rp) 155 ADCSBC %r19, %r7, %r29 156 ldd 64(vp), %r4 157 std %r29, 32(rp) 158 ADCSBC %r20, %r8, %r29 159 ldd 72(vp), %r5 160 std %r29, 40(rp) 161 ADCSBC %r21, %r9, %r29 162 ldd 80(vp), %r6 163 std %r29, 48(rp) 164 ADCSBC %r22, %r3, %r29 165 std %r29, 56(rp) 166 167 add,dc %r0, %r0, %r1 C save cy 168 169 ldd 88(vp), %r7 170 shrpd %r4, %r28, 63, %r31 171 ldd 96(vp), %r8 172 shrpd %r5, %r4, 63, %r4 173 ldd 104(vp), %r9 174 shrpd %r6, %r5, 63, %r5 175 ldd 112(vp), %r3 176 shrpd %r7, %r6, 63, %r6 177 ldd 120(vp), %r28 178 shrpd %r8, %r7, 63, %r7 179 ldd 64(up), %r19 180 shrpd %r9, %r8, 63, %r8 181 ldd 72(up), %r20 182 shrpd %r3, %r9, 63, %r9 183 ldd 80(up), %r21 184 shrpd %r28, %r3, 63, %r3 185 ldd 88(up), %r22 186 187 ldo 64(vp), vp 188 ldo 64(rp), rp 189 ldo 64(up), up 190 addib,> -8, n, L(loop) 191 addi -1, %r1, %r0 C restore cy 192 LDEF(end) 193 ADCSBC %r19, %r31, %r29 194 ldd 32(up), %r19 195 std %r29, 0(rp) 196 ADCSBC %r20, %r4, %r29 197 ldd 40(up), %r20 198 std %r29, 8(rp) 199 ADCSBC %r21, %r5, %r29 200 ldd 48(up), %r21 201 std %r29, 16(rp) 202 ADCSBC %r22, %r6, %r29 203 ldd 56(up), %r22 204 std %r29, 24(rp) 205 ADCSBC %r19, %r7, %r29 206 ldd -0xf8(%r30), %r4 C restore reg 207 std %r29, 32(rp) 208 ADCSBC %r20, %r8, %r29 209 ldd -0xf0(%r30), %r5 C restore reg 210 std %r29, 40(rp) 211 ADCSBC %r21, %r9, %r29 212 ldd -0xe8(%r30), %r6 C restore reg 213 std %r29, 48(rp) 214 ADCSBC %r22, %r3, %r29 215 ldd -0xe0(%r30), %r7 C restore reg 216 std %r29, 56(rp) 217 218 shrpd %r0, %r28, 63, %r28 219 ldd -0xd8(%r30), %r8 C restore reg 220 ADCSBC %r0, %r28, RETREG 221 ifdef(`OPERATION_sublsh1_n', 222 ` sub %r0, RETREG, RETREG') 223 CLRRET1 224 225 ldd -0xd0(%r30), %r9 C restore reg 226 bve (%r2) 227 ldd,mb -0x100(%r30), %r3 C restore reg 228 EPILOGUE()