github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/ev6/nails/addmul_4.asm (about) 1 dnl Alpha ev6 nails mpn_addmul_4. 2 3 dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C Runs at 2.5 cycles/limb. 34 35 C We should go for 2-way unrolling over 17 cycles, for 2.125 c/l corresponding 36 C to 3.24 insn/cycle. 37 38 39 C INPUT PARAMETERS 40 define(`rp',`r16') 41 define(`up',`r17') 42 define(`n',`r18') 43 define(`vp',`r19') 44 45 C Useful register aliases 46 define(`numb_mask',`r24') 47 define(`ulimb',`r25') 48 define(`rlimb',`r27') 49 50 define(`m0a',`r0') 51 define(`m0b',`r1') 52 define(`m1a',`r2') 53 define(`m1b',`r3') 54 define(`m2a',`r20') 55 define(`m2b',`r21') 56 define(`m3a',`r12') 57 define(`m3b',`r13') 58 59 define(`acc0',`r4') 60 define(`acc1',`r5') 61 define(`acc2',`r22') 62 define(`acc3',`r14') 63 64 define(`v0',`r6') 65 define(`v1',`r7') 66 define(`v2',`r23') 67 define(`v3',`r15') 68 69 C Used for temps: r8 r19 r28 70 71 define(`NAIL_BITS',`GMP_NAIL_BITS') 72 define(`NUMB_BITS',`GMP_NUMB_BITS') 73 74 C This declaration is munged by configure 75 NAILS_SUPPORT(4-63) 76 77 ASM_START() 78 PROLOGUE(mpn_addmul_4) 79 lda r30, -240(r30) 80 stq r12, 32(r30) 81 stq r13, 40(r30) 82 stq r14, 48(r30) 83 stq r15, 56(r30) 84 85 lda numb_mask,-1(r31) 86 srl numb_mask,NAIL_BITS,numb_mask 87 88 ldq v0, 0(vp) 89 ldq v1, 8(vp) 90 ldq v2, 16(vp) 91 ldq v3, 24(vp) 92 93 bis r31, r31, acc0 C zero acc0 94 sll v0,NAIL_BITS, v0 95 bis r31, r31, acc1 C zero acc1 96 sll v1,NAIL_BITS, v1 97 bis r31, r31, acc2 C zero acc2 98 sll v2,NAIL_BITS, v2 99 bis r31, r31, acc3 C zero acc3 100 sll v3,NAIL_BITS, v3 101 bis r31, r31, r19 102 103 ldq ulimb, 0(up) 104 lda up, 8(up) 105 mulq v0, ulimb, m0a C U1 106 umulh v0, ulimb, m0b C U1 107 mulq v1, ulimb, m1a C U1 108 umulh v1, ulimb, m1b C U1 109 lda n, -1(n) 110 mulq v2, ulimb, m2a C U1 111 umulh v2, ulimb, m2b C U1 112 mulq v3, ulimb, m3a C U1 113 umulh v3, ulimb, m3b C U1 114 beq n, L(end) C U0 115 116 ALIGN(16) 117 L(top): bis r31, r31, r31 C U1 nop 118 ldq rlimb, 0(rp) C L0 119 ldq ulimb, 0(up) C L1 120 addq r19, acc0, acc0 C U0 propagate nail 121 122 bis r31, r31, r31 C L0 nop 123 bis r31, r31, r31 C U1 nop 124 bis r31, r31, r31 C L1 nop 125 bis r31, r31, r31 C U0 nop 126 127 lda rp, 8(rp) C L0 128 srl m0a,NAIL_BITS, r8 C U0 129 lda up, 8(up) C L1 130 mulq v0, ulimb, m0a C U1 131 132 addq r8, acc0, r19 C U0 133 addq m0b, acc1, acc0 C L0 134 umulh v0, ulimb, m0b C U1 135 bis r31, r31, r31 C L1 nop 136 137 addq rlimb, r19, r19 C L0 138 srl m1a,NAIL_BITS, r8 C U0 139 bis r31, r31, r31 C L1 nop 140 mulq v1, ulimb, m1a C U1 141 142 addq r8, acc0, acc0 C U0 143 addq m1b, acc2, acc1 C L0 144 umulh v1, ulimb, m1b C U1 145 and r19,numb_mask, r28 C L1 extract numb part 146 147 bis r31, r31, r31 C L0 nop 148 srl m2a,NAIL_BITS, r8 C U0 149 lda n, -1(n) C L1 150 mulq v2, ulimb, m2a C U1 151 152 addq r8, acc1, acc1 C L1 153 addq m2b, acc3, acc2 C L0 154 umulh v2, ulimb, m2b C U1 155 srl r19,NUMB_BITS, r19 C U0 extract nail part 156 157 bis r31, r31, r31 C L0 nop 158 srl m3a,NAIL_BITS, r8 C U0 159 stq r28, -8(rp) C L1 160 mulq v3, ulimb, m3a C U1 161 162 addq r8, acc2, acc2 C L0 163 bis r31, m3b, acc3 C L1 164 umulh v3, ulimb, m3b C U1 165 bne n, L(top) C U0 166 167 L(end): ldq rlimb, 0(rp) 168 addq r19, acc0, acc0 C propagate nail 169 lda rp, 8(rp) C FIXME: DELETE 170 srl m0a,NAIL_BITS, r8 C U0 171 addq r8, acc0, r19 172 addq m0b, acc1, acc0 173 addq rlimb, r19, r19 174 srl m1a,NAIL_BITS, r8 C U0 175 addq r8, acc0, acc0 176 addq m1b, acc2, acc1 177 and r19,numb_mask, r28 C extract limb 178 srl m2a,NAIL_BITS, r8 C U0 179 addq r8, acc1, acc1 180 addq m2b, acc3, acc2 181 srl r19,NUMB_BITS, r19 C extract nail 182 srl m3a,NAIL_BITS, r8 C U0 183 stq r28, -8(rp) 184 addq r8, acc2, acc2 185 bis r31, m3b, acc3 186 187 addq r19, acc0, acc0 C propagate nail 188 and acc0,numb_mask, r28 189 stq r28, 0(rp) 190 srl acc0,NUMB_BITS, r19 191 addq r19, acc1, acc1 192 193 and acc1,numb_mask, r28 194 stq r28, 8(rp) 195 srl acc1,NUMB_BITS, r19 196 addq r19, acc2, acc2 197 198 and acc2,numb_mask, r28 199 stq r28, 16(rp) 200 srl acc2,NUMB_BITS, r19 201 addq r19, acc3, r0 202 203 ldq r12, 32(r30) 204 ldq r13, 40(r30) 205 ldq r14, 48(r30) 206 ldq r15, 56(r30) 207 lda r30, 240(r30) 208 ret r31, (r26), 1 209 EPILOGUE() 210 ASM_END()