github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/add_n_sub_n.asm (about) 1 dnl IA-64 mpn_add_n_sub_n -- mpn parallel addition and subtraction. 2 3 dnl Contributed to the GNU project by Torbjorn Granlund. 4 5 dnl Copyright 2010 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C Itanium: ? 37 C Itanium 2: 2.25 38 39 C INPUT PARAMETERS 40 define(`sp', `r32') 41 define(`dp', `r33') 42 define(`up', `r34') 43 define(`vp', `r35') 44 define(`n', `r36') 45 46 C Some useful aliases for registers we use 47 define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19') 48 define(`v0',`r20') define(`v1',`r21') define(`v2',`r22') define(`v3',`r23') 49 define(`s0',`r24') define(`s1',`r25') define(`s2',`r26') define(`s3',`r27') 50 define(`d0',`r28') define(`d1',`r29') define(`d2',`r30') define(`d3',`r31') 51 define(`up0',`up') 52 define(`up1',`r14') 53 define(`vp0',`vp') 54 define(`vp1',`r15') 55 56 57 ASM_START() 58 PROLOGUE(mpn_add_n_sub_n) 59 .prologue 60 .save ar.lc, r2 61 .body 62 ifdef(`HAVE_ABI_32',` 63 addp4 sp = 0, sp C M I 64 addp4 dp = 0, dp C M I 65 nop.i 0 66 addp4 up = 0, up C M I 67 addp4 vp = 0, vp C M I 68 zxt4 n = n C I 69 ;; 70 ') 71 72 and r9 = 3, n C M I 73 mov.i r2 = ar.lc C I0 74 add up1 = 8, up0 C M I 75 add vp1 = 8, vp0 C M I 76 add r8 = -2, n C M I 77 add r10 = 256, up C M I 78 ;; 79 shr.u r8 = r8, 2 C I0 80 cmp.eq p10, p0 = 0, r9 C M I 81 cmp.eq p11, p0 = 2, r9 C M I 82 cmp.eq p12, p0 = 3, r9 C M I 83 add r11 = 256, vp C M I 84 ;; 85 mov.i ar.lc = r8 C I0 86 (p10) br L(b0) C B 87 (p11) br L(b2) C B 88 (p12) br L(b3) C B 89 90 L(b1): ld8 u3 = [up0], 8 C M01 91 add up1 = 8, up1 C M I 92 cmpltu p14, p15 = 4, n C M I 93 ld8 v3 = [vp0], 8 C M01 94 add vp1 = 8, vp1 C M I 95 ;; 96 add s3 = u3, v3 C M I 97 sub d3 = u3, v3 C M I 98 mov r8 = 0 C M I 99 ;; 100 cmpltu p9, p0 = s3, v3 C carry from add3 M I 101 cmpltu p13, p0 = u3, v3 C borrow from sub3 M I 102 (p15) br L(cj1) C B 103 st8 [sp] = s3, 8 C M23 104 st8 [dp] = d3, 8 C M23 105 br L(c0) C B 106 107 L(b0): cmp.ne p9, p0 = r0, r0 C M I 108 cmp.ne p13, p0 = r0, r0 C M I 109 L(c0): ld8 u0 = [up0], 16 C M01 110 ld8 u1 = [up1], 16 C M01 111 ;; 112 ld8 v0 = [vp0], 16 C M01 113 ld8 v1 = [vp1], 16 C M01 114 ;; 115 ld8 u2 = [up0], 16 C M01 116 ld8 u3 = [up1], 16 C M01 117 ;; 118 ld8 v2 = [vp0], 16 C M01 119 ld8 v3 = [vp1], 16 C M01 120 ;; 121 add s0 = u0, v0 C M I 122 add s1 = u1, v1 C M I 123 sub d0 = u0, v0 C M I 124 sub d1 = u1, v1 C M I 125 ;; 126 cmpltu p6, p0 = s0, v0 C carry from add0 M I 127 cmpltu p7, p0 = s1, v1 C carry from add1 M I 128 cmpltu p10, p0 = u0, v0 C borrow from sub0 M I 129 cmpltu p11, p0 = u1, v1 C borrow from sub1 M I 130 ;; 131 nop 0 C 132 br.cloop.dptk L(top) C B 133 br L(end) C B 134 135 L(b3): ld8 u1 = [up0], 8 C M01 136 add up1 = 8, up1 C M I 137 ld8 v1 = [vp0], 8 C M01 138 ;; 139 add vp1 = 8, vp1 C M I 140 add s1 = u1, v1 C M I 141 sub d1 = u1, v1 C M I 142 ;; 143 cmpltu p7, p0 = s1, v1 C carry from add1 M I 144 cmpltu p11, p0 = u1, v1 C borrow from sub1 M I 145 ;; 146 st8 [sp] = s1, 8 C M23 147 st8 [dp] = d1, 8 C M23 148 br L(c2) C B 149 150 ALIGN(32) 151 L(b2): cmp.ne p7, p0 = r0, r0 C M I 152 cmp.ne p11, p0 = r0, r0 C M I 153 nop 0 154 L(c2): ld8 u2 = [up0], 16 C M01 155 ld8 u3 = [up1], 16 C M01 156 cmpltu p14, p0 = 4, n C M I 157 ;; 158 ld8 v2 = [vp0], 16 C M01 159 ld8 v3 = [vp1], 16 C M01 160 (p14) br L(gt4) C B 161 ;; 162 add s2 = u2, v2 C M I 163 add s3 = u3, v3 C M I 164 sub d2 = u2, v2 C M I 165 sub d3 = u3, v3 C M I 166 ;; 167 cmpltu p8, p0 = s2, v2 C carry from add0 M I 168 cmpltu p9, p0 = s3, v3 C carry from add3 M I 169 cmpltu p12, p0 = u2, v2 C borrow from sub2 M I 170 cmpltu p13, p0 = u3, v3 C borrow from sub3 M I 171 br L(cj2) C B 172 ;; 173 L(gt4): ld8 u0 = [up0], 16 C M01 174 ld8 u1 = [up1], 16 C M01 175 ;; 176 ld8 v0 = [vp0], 16 C M01 177 ld8 v1 = [vp1], 16 C M01 178 ;; 179 add s2 = u2, v2 C M I 180 add s3 = u3, v3 C M I 181 sub d2 = u2, v2 C M I 182 sub d3 = u3, v3 C M I 183 ;; 184 cmpltu p8, p0 = s2, v2 C carry from add0 M I 185 cmpltu p9, p0 = s3, v3 C carry from add1 M I 186 cmpltu p12, p0 = u2, v2 C borrow from sub0 M I 187 cmpltu p13, p0 = u3, v3 C borrow from sub1 M I 188 br.cloop.dptk L(mid) C B 189 190 ALIGN(32) 191 L(top): 192 ld8 u0 = [up0], 16 C M01 193 ld8 u1 = [up1], 16 C M01 194 (p9) cmpeqor p6, p0 = -1, s0 C M I 195 (p9) add s0 = 1, s0 C M I 196 (p13) cmpeqor p10, p0 = 0, d0 C M I 197 (p13) add d0 = -1, d0 C M I 198 ;; 199 ld8 v0 = [vp0], 16 C M01 200 ld8 v1 = [vp1], 16 C M01 201 (p6) cmpeqor p7, p0 = -1, s1 C M I 202 (p6) add s1 = 1, s1 C M I 203 (p10) cmpeqor p11, p0 = 0, d1 C M I 204 (p10) add d1 = -1, d1 C M I 205 ;; 206 st8 [sp] = s0, 8 C M23 207 st8 [dp] = d0, 8 C M23 208 add s2 = u2, v2 C M I 209 add s3 = u3, v3 C M I 210 sub d2 = u2, v2 C M I 211 sub d3 = u3, v3 C M I 212 ;; 213 st8 [sp] = s1, 8 C M23 214 st8 [dp] = d1, 8 C M23 215 cmpltu p8, p0 = s2, v2 C carry from add2 M I 216 cmpltu p9, p0 = s3, v3 C carry from add3 M I 217 cmpltu p12, p0 = u2, v2 C borrow from sub2 M I 218 cmpltu p13, p0 = u3, v3 C borrow from sub3 M I 219 ;; 220 L(mid): 221 ld8 u2 = [up0], 16 C M01 222 ld8 u3 = [up1], 16 C M01 223 (p7) cmpeqor p8, p0 = -1, s2 C M I 224 (p7) add s2 = 1, s2 C M I 225 (p11) cmpeqor p12, p0 = 0, d2 C M I 226 (p11) add d2 = -1, d2 C M I 227 ;; 228 ld8 v2 = [vp0], 16 C M01 229 ld8 v3 = [vp1], 16 C M01 230 (p8) cmpeqor p9, p0 = -1, s3 C M I 231 (p8) add s3 = 1, s3 C M I 232 (p12) cmpeqor p13, p0 = 0, d3 C M I 233 (p12) add d3 = -1, d3 C M I 234 ;; 235 st8 [sp] = s2, 8 C M23 236 st8 [dp] = d2, 8 C M23 237 add s0 = u0, v0 C M I 238 add s1 = u1, v1 C M I 239 sub d0 = u0, v0 C M I 240 sub d1 = u1, v1 C M I 241 ;; 242 st8 [sp] = s3, 8 C M23 243 st8 [dp] = d3, 8 C M23 244 cmpltu p6, p0 = s0, v0 C carry from add0 M I 245 cmpltu p7, p0 = s1, v1 C carry from add1 M I 246 cmpltu p10, p0 = u0, v0 C borrow from sub0 M I 247 cmpltu p11, p0 = u1, v1 C borrow from sub1 M I 248 ;; 249 lfetch [r10], 32 C M? 250 lfetch [r11], 32 C M? 251 br.cloop.dptk L(top) C B 252 ;; 253 254 L(end): 255 nop 0 256 nop 0 257 (p9) cmpeqor p6, p0 = -1, s0 C M I 258 (p9) add s0 = 1, s0 C M I 259 (p13) cmpeqor p10, p0 = 0, d0 C M I 260 (p13) add d0 = -1, d0 C M I 261 ;; 262 nop 0 263 nop 0 264 (p6) cmpeqor p7, p0 = -1, s1 C M I 265 (p6) add s1 = 1, s1 C M I 266 (p10) cmpeqor p11, p0 = 0, d1 C M I 267 (p10) add d1 = -1, d1 C M I 268 ;; 269 st8 [sp] = s0, 8 C M23 270 st8 [dp] = d0, 8 C M23 271 add s2 = u2, v2 C M I 272 add s3 = u3, v3 C M I 273 sub d2 = u2, v2 C M I 274 sub d3 = u3, v3 C M I 275 ;; 276 st8 [sp] = s1, 8 C M23 277 st8 [dp] = d1, 8 C M23 278 cmpltu p8, p0 = s2, v2 C carry from add2 M I 279 cmpltu p9, p0 = s3, v3 C carry from add3 M I 280 cmpltu p12, p0 = u2, v2 C borrow from sub2 M I 281 cmpltu p13, p0 = u3, v3 C borrow from sub3 M I 282 ;; 283 L(cj2): 284 (p7) cmpeqor p8, p0 = -1, s2 C M I 285 (p7) add s2 = 1, s2 C M I 286 (p11) cmpeqor p12, p0 = 0, d2 C M I 287 (p11) add d2 = -1, d2 C M I 288 mov r8 = 0 C M I 289 nop 0 290 ;; 291 st8 [sp] = s2, 8 C M23 292 st8 [dp] = d2, 8 C M23 293 (p8) cmpeqor p9, p0 = -1, s3 C M I 294 (p8) add s3 = 1, s3 C M I 295 (p12) cmpeqor p13, p0 = 0, d3 C M I 296 (p12) add d3 = -1, d3 C M I 297 ;; 298 L(cj1): 299 (p9) mov r8 = 2 C M I 300 ;; 301 mov.i ar.lc = r2 C I0 302 (p13) add r8 = 1, r8 C M I 303 st8 [sp] = s3 C M23 304 st8 [dp] = d3 C M23 305 br.ret.sptk.many b0 C B 306 EPILOGUE() 307 ASM_END()