github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/ev6/nails/aors_n.asm (about) 1 dnl Alpha ev6 nails mpn_add_n and mpn_sub_n. 2 3 dnl Copyright 2002, 2006 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 32 dnl Runs at 2.5 cycles/limb. It would be possible to reach 2.0 cycles/limb 33 dnl with 8-way unrolling. 34 35 include(`../config.m4') 36 37 dnl INPUT PARAMETERS 38 define(`rp',`r16') 39 define(`up',`r17') 40 define(`vp',`r18') 41 define(`n',`r19') 42 43 define(`rl0',`r0') 44 define(`rl1',`r1') 45 define(`rl2',`r2') 46 define(`rl3',`r3') 47 48 define(`ul0',`r4') 49 define(`ul1',`r5') 50 define(`ul2',`r6') 51 define(`ul3',`r7') 52 53 define(`vl0',`r22') 54 define(`vl1',`r23') 55 define(`vl2',`r24') 56 define(`vl3',`r25') 57 58 define(`numb_mask',`r21') 59 60 define(`NAIL_BITS',`GMP_NAIL_BITS') 61 define(`CYSH',`GMP_NUMB_BITS') 62 63 dnl This declaration is munged by configure 64 NAILS_SUPPORT(1-63) 65 66 ifdef(`OPERATION_add_n', ` 67 define(`OP', addq) 68 define(`CYSH',`GMP_NUMB_BITS') 69 define(`func', mpn_add_n)') 70 ifdef(`OPERATION_sub_n', ` 71 define(`OP', subq) 72 define(`CYSH',63) 73 define(`func', mpn_sub_n)') 74 75 MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n) 76 77 ASM_START() 78 PROLOGUE(func) 79 lda numb_mask, -1(r31) 80 srl numb_mask, NAIL_BITS, numb_mask 81 bis r31, r31, r20 82 83 and n, 3, r25 84 lda n, -4(n) 85 beq r25, L(ge4) 86 87 L(lp0): ldq ul0, 0(up) 88 lda up, 8(up) 89 ldq vl0, 0(vp) 90 lda vp, 8(vp) 91 lda rp, 8(rp) 92 lda r25, -1(r25) 93 OP ul0, vl0, rl0 94 OP rl0, r20, rl0 95 and rl0, numb_mask, r28 96 stq r28, -8(rp) 97 srl rl0, CYSH, r20 98 bne r25, L(lp0) 99 100 blt n, L(ret) 101 102 L(ge4): ldq ul0, 0(up) 103 ldq vl0, 0(vp) 104 ldq ul1, 8(up) 105 ldq vl1, 8(vp) 106 ldq ul2, 16(up) 107 ldq vl2, 16(vp) 108 ldq ul3, 24(up) 109 ldq vl3, 24(vp) 110 lda up, 32(up) 111 lda vp, 32(vp) 112 lda n, -4(n) 113 bge n, L(ge8) 114 115 OP ul0, vl0, rl0 C main-add 0 116 OP rl0, r20, rl0 C cy-add 0 117 OP ul1, vl1, rl1 C main-add 1 118 srl rl0, CYSH, r20 C gen cy 0 119 OP rl1, r20, rl1 C cy-add 1 120 and rl0,numb_mask, r27 121 br r31, L(cj0) 122 123 L(ge8): OP ul0, vl0, rl0 C main-add 0 124 ldq ul0, 0(up) 125 ldq vl0, 0(vp) 126 OP rl0, r20, rl0 C cy-add 0 127 OP ul1, vl1, rl1 C main-add 1 128 srl rl0, CYSH, r20 C gen cy 0 129 ldq ul1, 8(up) 130 ldq vl1, 8(vp) 131 OP rl1, r20, rl1 C cy-add 1 132 and rl0,numb_mask, r27 133 OP ul2, vl2, rl2 C main-add 2 134 srl rl1, CYSH, r20 C gen cy 1 135 ldq ul2, 16(up) 136 ldq vl2, 16(vp) 137 OP rl2, r20, rl2 C cy-add 2 138 and rl1,numb_mask, r28 139 stq r27, 0(rp) 140 OP ul3, vl3, rl3 C main-add 3 141 srl rl2, CYSH, r20 C gen cy 2 142 ldq ul3, 24(up) 143 ldq vl3, 24(vp) 144 OP rl3, r20, rl3 C cy-add 3 145 and rl2,numb_mask, r27 146 stq r28, 8(rp) 147 lda rp, 32(rp) 148 lda up, 32(up) 149 lda vp, 32(vp) 150 lda n, -4(n) 151 blt n, L(end) 152 153 ALIGN(32) 154 L(top): OP ul0, vl0, rl0 C main-add 0 155 srl rl3, CYSH, r20 C gen cy 3 156 ldq ul0, 0(up) 157 ldq vl0, 0(vp) 158 159 OP rl0, r20, rl0 C cy-add 0 160 and rl3,numb_mask, r28 161 stq r27, -16(rp) 162 bis r31, r31, r31 163 164 OP ul1, vl1, rl1 C main-add 1 165 srl rl0, CYSH, r20 C gen cy 0 166 ldq ul1, 8(up) 167 ldq vl1, 8(vp) 168 169 OP rl1, r20, rl1 C cy-add 1 170 and rl0,numb_mask, r27 171 stq r28, -8(rp) 172 bis r31, r31, r31 173 174 OP ul2, vl2, rl2 C main-add 2 175 srl rl1, CYSH, r20 C gen cy 1 176 ldq ul2, 16(up) 177 ldq vl2, 16(vp) 178 179 OP rl2, r20, rl2 C cy-add 2 180 and rl1,numb_mask, r28 181 stq r27, 0(rp) 182 bis r31, r31, r31 183 184 OP ul3, vl3, rl3 C main-add 3 185 srl rl2, CYSH, r20 C gen cy 2 186 ldq ul3, 24(up) 187 ldq vl3, 24(vp) 188 189 OP rl3, r20, rl3 C cy-add 3 190 and rl2,numb_mask, r27 191 stq r28, 8(rp) 192 bis r31, r31, r31 193 194 bis r31, r31, r31 195 lda n, -4(n) 196 lda up, 32(up) 197 lda vp, 32(vp) 198 199 bis r31, r31, r31 200 bis r31, r31, r31 201 lda rp, 32(rp) 202 bge n, L(top) 203 204 L(end): OP ul0, vl0, rl0 C main-add 0 205 srl rl3, CYSH, r20 C gen cy 3 206 OP rl0, r20, rl0 C cy-add 0 207 and rl3,numb_mask, r28 208 stq r27, -16(rp) 209 OP ul1, vl1, rl1 C main-add 1 210 srl rl0, CYSH, r20 C gen cy 0 211 OP rl1, r20, rl1 C cy-add 1 212 and rl0,numb_mask, r27 213 stq r28, -8(rp) 214 L(cj0): OP ul2, vl2, rl2 C main-add 2 215 srl rl1, CYSH, r20 C gen cy 1 216 OP rl2, r20, rl2 C cy-add 2 217 and rl1,numb_mask, r28 218 stq r27, 0(rp) 219 OP ul3, vl3, rl3 C main-add 3 220 srl rl2, CYSH, r20 C gen cy 2 221 OP rl3, r20, rl3 C cy-add 3 222 and rl2,numb_mask, r27 223 stq r28, 8(rp) 224 225 srl rl3, CYSH, r20 C gen cy 3 226 and rl3,numb_mask, r28 227 stq r27, 16(rp) 228 stq r28, 24(rp) 229 230 L(ret): and r20, 1, r0 231 ret r31, (r26), 1 232 EPILOGUE() 233 ASM_END()