github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/cnd_aors_n.asm (about) 1 dnl IA-64 mpn_cnd_add_n/mpn_cnd_sub_n. 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2013 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C Itanium: ? 37 C Itanium 2: 1.5 38 39 C INPUT PARAMETERS 40 define(`cnd', `r32') 41 define(`rp', `r33') 42 define(`up', `r34') 43 define(`vp', `r35') 44 define(`n', `r36') 45 46 ifdef(`OPERATION_cnd_add_n',` 47 define(ADDSUB, add) 48 define(CND, ltu) 49 define(INCR, 1) 50 define(LIM, -1) 51 define(func, mpn_cnd_add_n) 52 ') 53 ifdef(`OPERATION_cnd_sub_n',` 54 define(ADDSUB, sub) 55 define(CND, gtu) 56 define(INCR, -1) 57 define(LIM, 0) 58 define(func, mpn_cnd_sub_n) 59 ') 60 61 define(PFDIST, 160) 62 63 C Some useful aliases for registers we use 64 define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17') 65 define(`x0',`r20') define(`x1',`r21') define(`x2',`r22') define(`x3',`r23') 66 define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27') 67 define(`w0',`r28') define(`w1',`r29') define(`w2',`r30') define(`w3',`r31') 68 define(`up1',`up') define(`up2',`r8') define(`upadv',`r1') 69 define(`vp1',`vp') define(`vp2',`r9') define(`vpadv',`r11') 70 define(`rp1',`rp') define(`rp2',`r10') 71 72 MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n) 73 74 ASM_START() 75 PROLOGUE(func) 76 .prologue 77 .save ar.lc, r2 78 .body 79 ifdef(`HAVE_ABI_32',` 80 addp4 rp = 0, rp C M I 81 addp4 up = 0, up C M I 82 nop.i 0 83 addp4 vp = 0, vp C M I 84 nop.m 0 85 zxt4 n = n C I 86 ;; 87 ') 88 {.mmi; and r3 = 3, n C M I 89 add n = -1, n C M I 90 mov r2 = ar.lc C I0 91 }{.mmi; cmp.ne p6, p7 = 0, cnd C M I 92 add vp2 = 8, vp C M I 93 add up2 = 8, up C M I 94 ;; 95 }{.mmi; add upadv = PFDIST, up C M I 96 add vpadv = PFDIST, vp C M I 97 shr.u n = n, 2 C I0 98 .pred.rel "mutex", p6, p7 99 }{.mmi; add rp2 = 8, rp C M I 100 (p6) mov cnd = -1 C M I 101 (p7) mov cnd = 0 C M I 102 ;; 103 } cmp.eq p9, p0 = 1, r3 C M I 104 cmp.eq p7, p0 = 2, r3 C M I 105 cmp.eq p8, p0 = 3, r3 C M I 106 (p9) br L(b1) C B 107 (p7) br L(b2) C B 108 (p8) br L(b3) C B 109 ;; 110 L(b0): 111 {.mmi; ld8 v2 = [vp1], 16 C M01 112 ld8 v3 = [vp2], 16 C M01 113 mov ar.lc = n C I0 114 ;; 115 } ld8 u2 = [up1], 16 C M01 116 ld8 u3 = [up2], 16 C M01 117 and x2 = v2, cnd C M I 118 and x3 = v3, cnd C M I 119 ;; 120 ADDSUB w2 = u2, x2 C M I 121 ADDSUB w3 = u3, x3 C M I 122 ;; 123 ld8 v0 = [vp1], 16 C M01 124 ld8 v1 = [vp2], 16 C M01 125 cmp.CND p8, p0 = w2, u2 C M I 126 cmp.CND p9, p0 = w3, u3 C M I 127 br L(lo0) 128 129 L(b1): ld8 v1 = [vp1], 8 C M01 130 add vp2 = 8, vp2 C M I 131 add rp2 = 8, rp2 C M I 132 ;; 133 ld8 u1 = [up1], 8 C M01 134 add up2 = 8, up2 C M I 135 and x1 = v1, cnd C M I 136 ;; 137 ADDSUB w1 = u1, x1 C M I 138 cmp.ne p10, p0 = 0, n 139 add n = -1, n 140 ;; 141 cmp.CND p7, p0 = w1, u1 C M I 142 st8 [rp1] = w1, 8 C M23 143 (p10) br L(b0) 144 ;; 145 mov r8 = 0 C M I 146 br L(e1) 147 148 L(b3): ld8 v3 = [vp1], 8 C M01 149 add vp2 = 8, vp2 C M I 150 add rp2 = 8, rp2 C M I 151 ;; 152 ld8 u3 = [up1], 8 C M01 153 add up2 = 8, up2 C M I 154 and x3 = v3, cnd C M I 155 ;; 156 ADDSUB w3 = u3, x3 C M I 157 ;; 158 cmp.CND p9, p0 = w3, u3 C M I 159 st8 [rp1] = w3, 8 C M23 160 C fall through 161 162 L(b2): 163 {.mmi; ld8 v0 = [vp1], 16 C M01 164 ld8 v1 = [vp2], 16 C M01 165 mov ar.lc = n C I0 166 ;; 167 } ld8 u0 = [up1], 16 C M01 168 ld8 u1 = [up2], 16 C M01 169 and x0 = v0, cnd C M I 170 and x1 = v1, cnd C M I 171 ;; 172 ADDSUB w0 = u0, x0 C M I 173 ADDSUB w1 = u1, x1 C M I 174 br.cloop.dptk L(gt2) C B 175 ;; 176 cmp.CND p6, p0 = w0, u0 C M I 177 br L(e2) C B 178 L(gt2): 179 ld8 v2 = [vp1], 16 C M01 180 ld8 v3 = [vp2], 16 C M01 181 cmp.CND p6, p0 = w0, u0 C M I 182 cmp.CND p7, p0 = w1, u1 C M I 183 br L(lo2) C B 184 185 186 C *** MAIN LOOP START *** 187 C ALIGN(32) 188 L(top): 189 {.mmi; ld8 v2 = [vp1], 16 C M01 190 ld8 v3 = [vp2], 16 C M01 191 cmp.CND p6, p0 = w0, u0 C M I 192 }{.mmi; st8 [rp1] = w2, 16 C M23 193 st8 [rp2] = w3, 16 C M23 194 cmp.CND p7, p0 = w1, u1 C M I 195 ;; 196 } 197 L(lo2): 198 {.mmi; ld8 u2 = [up1], 16 C M01 199 ld8 u3 = [up2], 16 C M01 200 (p9) cmpeqor p6, p0 = LIM, w0 C M I 201 }{.mmi; and x2 = v2, cnd C M I 202 and x3 = v3, cnd C M I 203 (p9) add w0 = INCR, w0 C M I 204 ;; 205 }{.mmi; ADDSUB w2 = u2, x2 C M I 206 (p6) cmpeqor p7, p0 = LIM, w1 C M I 207 (p6) add w1 = INCR, w1 C M I 208 }{.mmi; ADDSUB w3 = u3, x3 C M I 209 lfetch [upadv], 32 210 nop 0 211 ;; 212 }{.mmi; ld8 v0 = [vp1], 16 C M01 213 ld8 v1 = [vp2], 16 C M01 214 cmp.CND p8, p0 = w2, u2 C M I 215 }{.mmi; st8 [rp1] = w0, 16 C M23 216 st8 [rp2] = w1, 16 C M23 217 cmp.CND p9, p0 = w3, u3 C M I 218 ;; 219 } 220 L(lo0): 221 {.mmi; ld8 u0 = [up1], 16 C M01 222 ld8 u1 = [up2], 16 C M01 223 (p7) cmpeqor p8, p0 = LIM, w2 C M I 224 }{.mmi; and x0 = v0, cnd C M I 225 and x1 = v1, cnd C M I 226 (p7) add w2 = INCR, w2 C M I 227 ;; 228 }{.mmi; ADDSUB w0 = u0, x0 C M I 229 (p8) cmpeqor p9, p0 = LIM, w3 C M I 230 (p8) add w3 = INCR, w3 C M I 231 }{.mmb; ADDSUB w1 = u1, x1 C M I 232 lfetch [vpadv], 32 233 br.cloop.dptk L(top) C B 234 ;; 235 } 236 C *** MAIN LOOP END *** 237 238 239 L(end): 240 {.mmi; st8 [rp1] = w2, 16 C M23 241 st8 [rp2] = w3, 16 C M23 242 cmp.CND p6, p0 = w0, u0 C M I 243 ;; 244 } 245 L(e2): 246 {.mmi; cmp.CND p7, p0 = w1, u1 C M I 247 (p9) cmpeqor p6, p0 = LIM, w0 C M I 248 (p9) add w0 = INCR, w0 C M I 249 ;; 250 }{.mmi; mov r8 = 0 C M I 251 (p6) cmpeqor p7, p0 = LIM, w1 C M I 252 (p6) add w1 = INCR, w1 C M I 253 ;; 254 }{.mmi; st8 [rp1] = w0, 16 C M23 255 st8 [rp2] = w1, 16 C M23 256 mov ar.lc = r2 C I0 257 } 258 L(e1): 259 {.mmb; nop 0 260 (p7) mov r8 = 1 C M I 261 br.ret.sptk.many b0 C B 262 } 263 EPILOGUE() 264 ASM_END()