github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc32/add_n.asm (about) 1 dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store 2 dnl sum in a third limb vector. 3 4 dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 33 include(`../config.m4') 34 35 C INPUT PARAMETERS 36 define(res_ptr,%o0) 37 define(s1_ptr,%o1) 38 define(s2_ptr,%o2) 39 define(n,%o3) 40 41 ASM_START() 42 PROLOGUE(mpn_add_n) 43 xor s2_ptr,res_ptr,%g1 44 andcc %g1,4,%g0 45 bne L(1) C branch if alignment differs 46 nop 47 C ** V1a ** 48 L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 49 be L(v1) C if no, branch 50 nop 51 C Add least significant limb separately to align res_ptr and s2_ptr 52 ld [s1_ptr],%g4 53 add s1_ptr,4,s1_ptr 54 ld [s2_ptr],%g2 55 add s2_ptr,4,s2_ptr 56 add n,-1,n 57 addcc %g4,%g2,%o4 58 st %o4,[res_ptr] 59 add res_ptr,4,res_ptr 60 L(v1): addx %g0,%g0,%o4 C save cy in register 61 cmp n,2 C if n < 2 ... 62 bl L(end2) C ... branch to tail code 63 subcc %g0,%o4,%g0 C restore cy 64 65 ld [s1_ptr+0],%g4 66 addcc n,-10,n 67 ld [s1_ptr+4],%g1 68 ldd [s2_ptr+0],%g2 69 blt L(fin1) 70 subcc %g0,%o4,%g0 C restore cy 71 C Add blocks of 8 limbs until less than 8 limbs remain 72 L(loop1): 73 addxcc %g4,%g2,%o4 74 ld [s1_ptr+8],%g4 75 addxcc %g1,%g3,%o5 76 ld [s1_ptr+12],%g1 77 ldd [s2_ptr+8],%g2 78 std %o4,[res_ptr+0] 79 addxcc %g4,%g2,%o4 80 ld [s1_ptr+16],%g4 81 addxcc %g1,%g3,%o5 82 ld [s1_ptr+20],%g1 83 ldd [s2_ptr+16],%g2 84 std %o4,[res_ptr+8] 85 addxcc %g4,%g2,%o4 86 ld [s1_ptr+24],%g4 87 addxcc %g1,%g3,%o5 88 ld [s1_ptr+28],%g1 89 ldd [s2_ptr+24],%g2 90 std %o4,[res_ptr+16] 91 addxcc %g4,%g2,%o4 92 ld [s1_ptr+32],%g4 93 addxcc %g1,%g3,%o5 94 ld [s1_ptr+36],%g1 95 ldd [s2_ptr+32],%g2 96 std %o4,[res_ptr+24] 97 addx %g0,%g0,%o4 C save cy in register 98 addcc n,-8,n 99 add s1_ptr,32,s1_ptr 100 add s2_ptr,32,s2_ptr 101 add res_ptr,32,res_ptr 102 bge L(loop1) 103 subcc %g0,%o4,%g0 C restore cy 104 105 L(fin1): 106 addcc n,8-2,n 107 blt L(end1) 108 subcc %g0,%o4,%g0 C restore cy 109 C Add blocks of 2 limbs until less than 2 limbs remain 110 L(loope1): 111 addxcc %g4,%g2,%o4 112 ld [s1_ptr+8],%g4 113 addxcc %g1,%g3,%o5 114 ld [s1_ptr+12],%g1 115 ldd [s2_ptr+8],%g2 116 std %o4,[res_ptr+0] 117 addx %g0,%g0,%o4 C save cy in register 118 addcc n,-2,n 119 add s1_ptr,8,s1_ptr 120 add s2_ptr,8,s2_ptr 121 add res_ptr,8,res_ptr 122 bge L(loope1) 123 subcc %g0,%o4,%g0 C restore cy 124 L(end1): 125 addxcc %g4,%g2,%o4 126 addxcc %g1,%g3,%o5 127 std %o4,[res_ptr+0] 128 addx %g0,%g0,%o4 C save cy in register 129 130 andcc n,1,%g0 131 be L(ret1) 132 subcc %g0,%o4,%g0 C restore cy 133 C Add last limb 134 ld [s1_ptr+8],%g4 135 ld [s2_ptr+8],%g2 136 addxcc %g4,%g2,%o4 137 st %o4,[res_ptr+8] 138 139 L(ret1): 140 retl 141 addx %g0,%g0,%o0 C return carry-out from most sign. limb 142 143 L(1): xor s1_ptr,res_ptr,%g1 144 andcc %g1,4,%g0 145 bne L(2) 146 nop 147 C ** V1b ** 148 mov s2_ptr,%g1 149 mov s1_ptr,s2_ptr 150 b L(0) 151 mov %g1,s1_ptr 152 153 C ** V2 ** 154 C If we come here, the alignment of s1_ptr and res_ptr as well as the 155 C alignment of s2_ptr and res_ptr differ. Since there are only two ways 156 C things can be aligned (that we care about) we now know that the alignment 157 C of s1_ptr and s2_ptr are the same. 158 159 L(2): cmp n,1 160 be L(jone) 161 nop 162 andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 163 be L(v2) C if no, branch 164 nop 165 C Add least significant limb separately to align s1_ptr and s2_ptr 166 ld [s1_ptr],%g4 167 add s1_ptr,4,s1_ptr 168 ld [s2_ptr],%g2 169 add s2_ptr,4,s2_ptr 170 add n,-1,n 171 addcc %g4,%g2,%o4 172 st %o4,[res_ptr] 173 add res_ptr,4,res_ptr 174 175 L(v2): addx %g0,%g0,%o4 C save cy in register 176 addcc n,-8,n 177 blt L(fin2) 178 subcc %g0,%o4,%g0 C restore cy 179 C Add blocks of 8 limbs until less than 8 limbs remain 180 L(loop2): 181 ldd [s1_ptr+0],%g2 182 ldd [s2_ptr+0],%o4 183 addxcc %g2,%o4,%g2 184 st %g2,[res_ptr+0] 185 addxcc %g3,%o5,%g3 186 st %g3,[res_ptr+4] 187 ldd [s1_ptr+8],%g2 188 ldd [s2_ptr+8],%o4 189 addxcc %g2,%o4,%g2 190 st %g2,[res_ptr+8] 191 addxcc %g3,%o5,%g3 192 st %g3,[res_ptr+12] 193 ldd [s1_ptr+16],%g2 194 ldd [s2_ptr+16],%o4 195 addxcc %g2,%o4,%g2 196 st %g2,[res_ptr+16] 197 addxcc %g3,%o5,%g3 198 st %g3,[res_ptr+20] 199 ldd [s1_ptr+24],%g2 200 ldd [s2_ptr+24],%o4 201 addxcc %g2,%o4,%g2 202 st %g2,[res_ptr+24] 203 addxcc %g3,%o5,%g3 204 st %g3,[res_ptr+28] 205 addx %g0,%g0,%o4 C save cy in register 206 addcc n,-8,n 207 add s1_ptr,32,s1_ptr 208 add s2_ptr,32,s2_ptr 209 add res_ptr,32,res_ptr 210 bge L(loop2) 211 subcc %g0,%o4,%g0 C restore cy 212 213 L(fin2): 214 addcc n,8-2,n 215 blt L(end2) 216 subcc %g0,%o4,%g0 C restore cy 217 L(loope2): 218 ldd [s1_ptr+0],%g2 219 ldd [s2_ptr+0],%o4 220 addxcc %g2,%o4,%g2 221 st %g2,[res_ptr+0] 222 addxcc %g3,%o5,%g3 223 st %g3,[res_ptr+4] 224 addx %g0,%g0,%o4 C save cy in register 225 addcc n,-2,n 226 add s1_ptr,8,s1_ptr 227 add s2_ptr,8,s2_ptr 228 add res_ptr,8,res_ptr 229 bge L(loope2) 230 subcc %g0,%o4,%g0 C restore cy 231 L(end2): 232 andcc n,1,%g0 233 be L(ret2) 234 subcc %g0,%o4,%g0 C restore cy 235 C Add last limb 236 L(jone): 237 ld [s1_ptr],%g4 238 ld [s2_ptr],%g2 239 addxcc %g4,%g2,%o4 240 st %o4,[res_ptr] 241 242 L(ret2): 243 retl 244 addx %g0,%g0,%o0 C return carry-out from most sign. limb 245 EPILOGUE(mpn_add_n)