github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc32/sub_n.asm (about) 1 dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2 dnl store difference in a third limb vector. 3 4 dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 33 include(`../config.m4') 34 35 C INPUT PARAMETERS 36 define(res_ptr,%o0) 37 define(s1_ptr,%o1) 38 define(s2_ptr,%o2) 39 define(n,%o3) 40 41 ASM_START() 42 PROLOGUE(mpn_sub_n) 43 xor s2_ptr,res_ptr,%g1 44 andcc %g1,4,%g0 45 bne L(1) C branch if alignment differs 46 nop 47 C ** V1a ** 48 andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 49 be L(v1) C if no, branch 50 nop 51 C Add least significant limb separately to align res_ptr and s2_ptr 52 ld [s1_ptr],%g4 53 add s1_ptr,4,s1_ptr 54 ld [s2_ptr],%g2 55 add s2_ptr,4,s2_ptr 56 add n,-1,n 57 subcc %g4,%g2,%o4 58 st %o4,[res_ptr] 59 add res_ptr,4,res_ptr 60 L(v1): addx %g0,%g0,%o4 C save cy in register 61 cmp n,2 C if n < 2 ... 62 bl L(end2) C ... branch to tail code 63 subcc %g0,%o4,%g0 C restore cy 64 65 ld [s1_ptr+0],%g4 66 addcc n,-10,n 67 ld [s1_ptr+4],%g1 68 ldd [s2_ptr+0],%g2 69 blt L(fin1) 70 subcc %g0,%o4,%g0 C restore cy 71 C Add blocks of 8 limbs until less than 8 limbs remain 72 L(loop1): 73 subxcc %g4,%g2,%o4 74 ld [s1_ptr+8],%g4 75 subxcc %g1,%g3,%o5 76 ld [s1_ptr+12],%g1 77 ldd [s2_ptr+8],%g2 78 std %o4,[res_ptr+0] 79 subxcc %g4,%g2,%o4 80 ld [s1_ptr+16],%g4 81 subxcc %g1,%g3,%o5 82 ld [s1_ptr+20],%g1 83 ldd [s2_ptr+16],%g2 84 std %o4,[res_ptr+8] 85 subxcc %g4,%g2,%o4 86 ld [s1_ptr+24],%g4 87 subxcc %g1,%g3,%o5 88 ld [s1_ptr+28],%g1 89 ldd [s2_ptr+24],%g2 90 std %o4,[res_ptr+16] 91 subxcc %g4,%g2,%o4 92 ld [s1_ptr+32],%g4 93 subxcc %g1,%g3,%o5 94 ld [s1_ptr+36],%g1 95 ldd [s2_ptr+32],%g2 96 std %o4,[res_ptr+24] 97 addx %g0,%g0,%o4 C save cy in register 98 addcc n,-8,n 99 add s1_ptr,32,s1_ptr 100 add s2_ptr,32,s2_ptr 101 add res_ptr,32,res_ptr 102 bge L(loop1) 103 subcc %g0,%o4,%g0 C restore cy 104 105 L(fin1): 106 addcc n,8-2,n 107 blt L(end1) 108 subcc %g0,%o4,%g0 C restore cy 109 C Add blocks of 2 limbs until less than 2 limbs remain 110 L(loope1): 111 subxcc %g4,%g2,%o4 112 ld [s1_ptr+8],%g4 113 subxcc %g1,%g3,%o5 114 ld [s1_ptr+12],%g1 115 ldd [s2_ptr+8],%g2 116 std %o4,[res_ptr+0] 117 addx %g0,%g0,%o4 C save cy in register 118 addcc n,-2,n 119 add s1_ptr,8,s1_ptr 120 add s2_ptr,8,s2_ptr 121 add res_ptr,8,res_ptr 122 bge L(loope1) 123 subcc %g0,%o4,%g0 C restore cy 124 L(end1): 125 subxcc %g4,%g2,%o4 126 subxcc %g1,%g3,%o5 127 std %o4,[res_ptr+0] 128 addx %g0,%g0,%o4 C save cy in register 129 130 andcc n,1,%g0 131 be L(ret1) 132 subcc %g0,%o4,%g0 C restore cy 133 C Add last limb 134 ld [s1_ptr+8],%g4 135 ld [s2_ptr+8],%g2 136 subxcc %g4,%g2,%o4 137 st %o4,[res_ptr+8] 138 139 L(ret1): 140 retl 141 addx %g0,%g0,%o0 C return carry-out from most sign. limb 142 143 L(1): xor s1_ptr,res_ptr,%g1 144 andcc %g1,4,%g0 145 bne L(2) 146 nop 147 C ** V1b ** 148 andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 149 be L(v1b) C if no, branch 150 nop 151 C Add least significant limb separately to align res_ptr and s1_ptr 152 ld [s2_ptr],%g4 153 add s2_ptr,4,s2_ptr 154 ld [s1_ptr],%g2 155 add s1_ptr,4,s1_ptr 156 add n,-1,n 157 subcc %g2,%g4,%o4 158 st %o4,[res_ptr] 159 add res_ptr,4,res_ptr 160 L(v1b): addx %g0,%g0,%o4 C save cy in register 161 cmp n,2 C if n < 2 ... 162 bl L(end2) C ... branch to tail code 163 subcc %g0,%o4,%g0 C restore cy 164 165 ld [s2_ptr+0],%g4 166 addcc n,-10,n 167 ld [s2_ptr+4],%g1 168 ldd [s1_ptr+0],%g2 169 blt L(fin1b) 170 subcc %g0,%o4,%g0 C restore cy 171 C Add blocks of 8 limbs until less than 8 limbs remain 172 L(loop1b): 173 subxcc %g2,%g4,%o4 174 ld [s2_ptr+8],%g4 175 subxcc %g3,%g1,%o5 176 ld [s2_ptr+12],%g1 177 ldd [s1_ptr+8],%g2 178 std %o4,[res_ptr+0] 179 subxcc %g2,%g4,%o4 180 ld [s2_ptr+16],%g4 181 subxcc %g3,%g1,%o5 182 ld [s2_ptr+20],%g1 183 ldd [s1_ptr+16],%g2 184 std %o4,[res_ptr+8] 185 subxcc %g2,%g4,%o4 186 ld [s2_ptr+24],%g4 187 subxcc %g3,%g1,%o5 188 ld [s2_ptr+28],%g1 189 ldd [s1_ptr+24],%g2 190 std %o4,[res_ptr+16] 191 subxcc %g2,%g4,%o4 192 ld [s2_ptr+32],%g4 193 subxcc %g3,%g1,%o5 194 ld [s2_ptr+36],%g1 195 ldd [s1_ptr+32],%g2 196 std %o4,[res_ptr+24] 197 addx %g0,%g0,%o4 C save cy in register 198 addcc n,-8,n 199 add s1_ptr,32,s1_ptr 200 add s2_ptr,32,s2_ptr 201 add res_ptr,32,res_ptr 202 bge L(loop1b) 203 subcc %g0,%o4,%g0 C restore cy 204 205 L(fin1b): 206 addcc n,8-2,n 207 blt L(end1b) 208 subcc %g0,%o4,%g0 C restore cy 209 C Add blocks of 2 limbs until less than 2 limbs remain 210 L(loope1b): 211 subxcc %g2,%g4,%o4 212 ld [s2_ptr+8],%g4 213 subxcc %g3,%g1,%o5 214 ld [s2_ptr+12],%g1 215 ldd [s1_ptr+8],%g2 216 std %o4,[res_ptr+0] 217 addx %g0,%g0,%o4 C save cy in register 218 addcc n,-2,n 219 add s1_ptr,8,s1_ptr 220 add s2_ptr,8,s2_ptr 221 add res_ptr,8,res_ptr 222 bge L(loope1b) 223 subcc %g0,%o4,%g0 C restore cy 224 L(end1b): 225 subxcc %g2,%g4,%o4 226 subxcc %g3,%g1,%o5 227 std %o4,[res_ptr+0] 228 addx %g0,%g0,%o4 C save cy in register 229 230 andcc n,1,%g0 231 be L(ret1b) 232 subcc %g0,%o4,%g0 C restore cy 233 C Add last limb 234 ld [s2_ptr+8],%g4 235 ld [s1_ptr+8],%g2 236 subxcc %g2,%g4,%o4 237 st %o4,[res_ptr+8] 238 239 L(ret1b): 240 retl 241 addx %g0,%g0,%o0 C return carry-out from most sign. limb 242 243 C ** V2 ** 244 C If we come here, the alignment of s1_ptr and res_ptr as well as the 245 C alignment of s2_ptr and res_ptr differ. Since there are only two ways 246 C things can be aligned (that we care about) we now know that the alignment 247 C of s1_ptr and s2_ptr are the same. 248 249 L(2): cmp n,1 250 be L(jone) 251 nop 252 andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 253 be L(v2) C if no, branch 254 nop 255 C Add least significant limb separately to align s1_ptr and s2_ptr 256 ld [s1_ptr],%g4 257 add s1_ptr,4,s1_ptr 258 ld [s2_ptr],%g2 259 add s2_ptr,4,s2_ptr 260 add n,-1,n 261 subcc %g4,%g2,%o4 262 st %o4,[res_ptr] 263 add res_ptr,4,res_ptr 264 265 L(v2): addx %g0,%g0,%o4 C save cy in register 266 addcc n,-8,n 267 blt L(fin2) 268 subcc %g0,%o4,%g0 C restore cy 269 C Add blocks of 8 limbs until less than 8 limbs remain 270 L(loop2): 271 ldd [s1_ptr+0],%g2 272 ldd [s2_ptr+0],%o4 273 subxcc %g2,%o4,%g2 274 st %g2,[res_ptr+0] 275 subxcc %g3,%o5,%g3 276 st %g3,[res_ptr+4] 277 ldd [s1_ptr+8],%g2 278 ldd [s2_ptr+8],%o4 279 subxcc %g2,%o4,%g2 280 st %g2,[res_ptr+8] 281 subxcc %g3,%o5,%g3 282 st %g3,[res_ptr+12] 283 ldd [s1_ptr+16],%g2 284 ldd [s2_ptr+16],%o4 285 subxcc %g2,%o4,%g2 286 st %g2,[res_ptr+16] 287 subxcc %g3,%o5,%g3 288 st %g3,[res_ptr+20] 289 ldd [s1_ptr+24],%g2 290 ldd [s2_ptr+24],%o4 291 subxcc %g2,%o4,%g2 292 st %g2,[res_ptr+24] 293 subxcc %g3,%o5,%g3 294 st %g3,[res_ptr+28] 295 addx %g0,%g0,%o4 C save cy in register 296 addcc n,-8,n 297 add s1_ptr,32,s1_ptr 298 add s2_ptr,32,s2_ptr 299 add res_ptr,32,res_ptr 300 bge L(loop2) 301 subcc %g0,%o4,%g0 C restore cy 302 303 L(fin2): 304 addcc n,8-2,n 305 blt L(end2) 306 subcc %g0,%o4,%g0 C restore cy 307 L(loope2): 308 ldd [s1_ptr+0],%g2 309 ldd [s2_ptr+0],%o4 310 subxcc %g2,%o4,%g2 311 st %g2,[res_ptr+0] 312 subxcc %g3,%o5,%g3 313 st %g3,[res_ptr+4] 314 addx %g0,%g0,%o4 C save cy in register 315 addcc n,-2,n 316 add s1_ptr,8,s1_ptr 317 add s2_ptr,8,s2_ptr 318 add res_ptr,8,res_ptr 319 bge L(loope2) 320 subcc %g0,%o4,%g0 C restore cy 321 L(end2): 322 andcc n,1,%g0 323 be L(ret2) 324 subcc %g0,%o4,%g0 C restore cy 325 C Add last limb 326 L(jone): 327 ld [s1_ptr],%g4 328 ld [s2_ptr],%g2 329 subxcc %g4,%g2,%o4 330 st %o4,[res_ptr] 331 332 L(ret2): 333 retl 334 addx %g0,%g0,%o0 C return carry-out from most sign. limb 335 EPILOGUE(mpn_sub_n)