github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc32/v9/sub_n.asm (about) 1 dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2 dnl store difference in a third limb vector. 3 4 dnl Copyright 2001 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 33 include(`../config.m4') 34 35 C INPUT PARAMETERS 36 define(rp,%o0) 37 define(s1p,%o1) 38 define(s2p,%o2) 39 define(n,%o3) 40 define(cy,%g1) 41 42 C This code uses 64-bit operations on `o' and `g' registers. It doesn't 43 C require that `o' registers' upper 32 bits are preserved by the operating 44 C system, but if they are not, they must be zeroed. That is indeed what 45 C happens at least on Slowaris 2.5 and 2.6. 46 47 C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at 48 C about 10 cycles/limb from the Ecache. 49 50 ASM_START() 51 PROLOGUE(mpn_sub_n) 52 lduw [s1p+0],%o4 53 lduw [s2p+0],%o5 54 addcc n,-2,n 55 bl,pn %icc,L(end1) 56 lduw [s1p+4],%g2 57 lduw [s2p+4],%g3 58 be,pn %icc,L(end2) 59 mov 0,cy 60 61 .align 16 62 L(loop): 63 sub %o4,%o5,%g4 64 add rp,8,rp 65 lduw [s1p+8],%o4 66 fitod %f0,%f2 67 C --- 68 sub %g4,cy,%g4 69 addcc n,-1,n 70 lduw [s2p+8],%o5 71 fitod %f0,%f2 72 C --- 73 srlx %g4,63,cy 74 add s2p,8,s2p 75 stw %g4,[rp-8] 76 be,pn %icc,L(exito)+4 77 C --- 78 sub %g2,%g3,%g4 79 addcc n,-1,n 80 lduw [s1p+12],%g2 81 fitod %f0,%f2 82 C --- 83 sub %g4,cy,%g4 84 add s1p,8,s1p 85 lduw [s2p+4],%g3 86 fitod %f0,%f2 87 C --- 88 srlx %g4,63,cy 89 bne,pt %icc,L(loop) 90 stw %g4,[rp-4] 91 C --- 92 L(exite): 93 sub %o4,%o5,%g4 94 sub %g4,cy,%g4 95 srlx %g4,63,cy 96 stw %g4,[rp+0] 97 sub %g2,%g3,%g4 98 sub %g4,cy,%g4 99 stw %g4,[rp+4] 100 retl 101 srlx %g4,63,%o0 102 103 L(exito): 104 sub %g2,%g3,%g4 105 sub %g4,cy,%g4 106 srlx %g4,63,cy 107 stw %g4,[rp-4] 108 sub %o4,%o5,%g4 109 sub %g4,cy,%g4 110 stw %g4,[rp+0] 111 retl 112 srlx %g4,63,%o0 113 114 L(end1): 115 sub %o4,%o5,%g4 116 stw %g4,[rp+0] 117 retl 118 srlx %g4,63,%o0 119 120 L(end2): 121 sub %o4,%o5,%g4 122 srlx %g4,63,cy 123 stw %g4,[rp+0] 124 sub %g2,%g3,%g4 125 sub %g4,cy,%g4 126 stw %g4,[rp+4] 127 retl 128 srlx %g4,63,%o0 129 EPILOGUE(mpn_sub_n)