github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/ultrasparc1234/add_n.asm (about) 1 dnl SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and 2 dnl store sum in a third limb vector. 3 4 dnl Copyright 2001-2003, 2011 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C cycles/limb 35 C UltraSPARC 1&2: 4 36 C UltraSPARC 3: 4.5 37 38 C Compute carry-out from the most significant bits of u,v, and r, where 39 C r=u+v+carry_in, using logic operations. 40 41 C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn 42 C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated. 43 C Therefore, it seems futile to try to optimize this any further... 44 45 C INPUT PARAMETERS 46 define(`rp', `%i0') 47 define(`up', `%i1') 48 define(`vp', `%i2') 49 define(`n', `%i3') 50 51 define(`u0', `%l0') 52 define(`u1', `%l2') 53 define(`u2', `%l4') 54 define(`u3', `%l6') 55 define(`v0', `%l1') 56 define(`v1', `%l3') 57 define(`v2', `%l5') 58 define(`v3', `%l7') 59 60 define(`cy',`%i4') 61 62 define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe 63 define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe 64 65 ASM_START() 66 REGISTER(%g2,#scratch) 67 REGISTER(%g3,#scratch) 68 PROLOGUE(mpn_add_nc) 69 save %sp,-160,%sp 70 71 fitod %f0,%f0 C make sure f0 contains small, quiet number 72 subcc n,4,%g0 73 bl,pn %xcc,.Loop0 74 nop 75 b,a L(com) 76 EPILOGUE() 77 78 PROLOGUE(mpn_add_n) 79 save %sp,-160,%sp 80 81 fitod %f0,%f0 C make sure f0 contains small, quiet number 82 subcc n,4,%g0 83 bl,pn %xcc,.Loop0 84 mov 0,cy 85 L(com): 86 ldx [up+0],u0 87 ldx [vp+0],v0 88 add up,32,up 89 ldx [up-24],u1 90 ldx [vp+8],v1 91 add vp,32,vp 92 ldx [up-16],u2 93 ldx [vp-16],v2 94 ldx [up-8],u3 95 ldx [vp-8],v3 96 subcc n,8,n 97 add u0,v0,%g1 C main add 98 add %g1,cy,%g5 C carry add 99 or u0,v0,%g2 100 bl,pn %xcc,.Lend4567 101 fanop 102 b,a .Loop 103 104 .align 16 105 C START MAIN LOOP 106 .Loop: andn %g2,%g5,%g2 107 and u0,v0,%g3 108 ldx [up+0],u0 109 fanop 110 C -- 111 or %g3,%g2,%g2 112 ldx [vp+0],v0 113 add up,32,up 114 fanop 115 C -- 116 srlx %g2,63,cy 117 add u1,v1,%g1 118 stx %g5,[rp+0] 119 fanop 120 C -- 121 add %g1,cy,%g5 122 or u1,v1,%g2 123 fmnop 124 fanop 125 C -- 126 andn %g2,%g5,%g2 127 and u1,v1,%g3 128 ldx [up-24],u1 129 fanop 130 C -- 131 or %g3,%g2,%g2 132 ldx [vp+8],v1 133 add vp,32,vp 134 fanop 135 C -- 136 srlx %g2,63,cy 137 add u2,v2,%g1 138 stx %g5,[rp+8] 139 fanop 140 C -- 141 add %g1,cy,%g5 142 or u2,v2,%g2 143 fmnop 144 fanop 145 C -- 146 andn %g2,%g5,%g2 147 and u2,v2,%g3 148 ldx [up-16],u2 149 fanop 150 C -- 151 or %g3,%g2,%g2 152 ldx [vp-16],v2 153 add rp,32,rp 154 fanop 155 C -- 156 srlx %g2,63,cy 157 add u3,v3,%g1 158 stx %g5,[rp-16] 159 fanop 160 C -- 161 add %g1,cy,%g5 162 or u3,v3,%g2 163 fmnop 164 fanop 165 C -- 166 andn %g2,%g5,%g2 167 and u3,v3,%g3 168 ldx [up-8],u3 169 fanop 170 C -- 171 or %g3,%g2,%g2 172 subcc n,4,n 173 ldx [vp-8],v3 174 fanop 175 C -- 176 srlx %g2,63,cy 177 add u0,v0,%g1 178 stx %g5,[rp-8] 179 fanop 180 C -- 181 add %g1,cy,%g5 182 or u0,v0,%g2 183 bge,pt %xcc,.Loop 184 fanop 185 C END MAIN LOOP 186 .Lend4567: 187 andn %g2,%g5,%g2 188 and u0,v0,%g3 189 or %g3,%g2,%g2 190 srlx %g2,63,cy 191 add u1,v1,%g1 192 stx %g5,[rp+0] 193 add %g1,cy,%g5 194 or u1,v1,%g2 195 andn %g2,%g5,%g2 196 and u1,v1,%g3 197 or %g3,%g2,%g2 198 srlx %g2,63,cy 199 add u2,v2,%g1 200 stx %g5,[rp+8] 201 add %g1,cy,%g5 202 or u2,v2,%g2 203 andn %g2,%g5,%g2 204 and u2,v2,%g3 205 or %g3,%g2,%g2 206 add rp,32,rp 207 srlx %g2,63,cy 208 add u3,v3,%g1 209 stx %g5,[rp-16] 210 add %g1,cy,%g5 211 or u3,v3,%g2 212 andn %g2,%g5,%g2 213 and u3,v3,%g3 214 or %g3,%g2,%g2 215 srlx %g2,63,cy 216 stx %g5,[rp-8] 217 218 addcc n,4,n 219 bz,pn %xcc,.Lret 220 fanop 221 222 .Loop0: ldx [up],u0 223 add up,8,up 224 ldx [vp],v0 225 add vp,8,vp 226 add rp,8,rp 227 subcc n,1,n 228 add u0,v0,%g1 229 or u0,v0,%g2 230 add %g1,cy,%g5 231 and u0,v0,%g3 232 andn %g2,%g5,%g2 233 stx %g5,[rp-8] 234 or %g3,%g2,%g2 235 bnz,pt %xcc,.Loop0 236 srlx %g2,63,cy 237 238 .Lret: mov cy,%i0 239 ret 240 restore 241 EPILOGUE()