github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/pa32/hppa1_1/pa7100/submul_1.asm (about) 1 dnl HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and 2 dnl subtract the result from a second limb vector. 3 4 dnl Copyright 1995, 2000-2003 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C INPUT PARAMETERS 35 define(`res_ptr',`%r26') 36 define(`s1_ptr',`%r25') 37 define(`size_param',`%r24') 38 define(`s2_limb',`%r23') 39 40 define(`cylimb',`%r28') 41 define(`s0',`%r19') 42 define(`s1',`%r20') 43 define(`s2',`%r3') 44 define(`s3',`%r4') 45 define(`lo0',`%r21') 46 define(`lo1',`%r5') 47 define(`lo2',`%r6') 48 define(`lo3',`%r7') 49 define(`hi0',`%r22') 50 define(`hi1',`%r23') C safe to reuse 51 define(`hi2',`%r29') 52 define(`hi3',`%r1') 53 54 ASM_START() 55 PROLOGUE(mpn_submul_1) 56 C .callinfo frame=128,no_calls 57 58 ldo 128(%r30),%r30 59 stws s2_limb,-16(%r30) 60 add %r0,%r0,cylimb C clear cy and cylimb 61 addib,< -4,size_param,L(few_limbs) 62 fldws -16(%r30),%fr31R 63 64 ldo -112(%r30),%r31 65 stw %r3,-96(%r30) 66 stw %r4,-92(%r30) 67 stw %r5,-88(%r30) 68 stw %r6,-84(%r30) 69 stw %r7,-80(%r30) 70 71 bb,>=,n s1_ptr,29,L(0) 72 73 fldws,ma 4(s1_ptr),%fr4 74 ldws 0(res_ptr),s0 75 xmpyu %fr4,%fr31R,%fr5 76 fstds %fr5,-16(%r31) 77 ldws -16(%r31),cylimb 78 ldws -12(%r31),lo0 79 sub s0,lo0,s0 80 add s0,lo0,%r0 C invert cy 81 addib,< -1,size_param,L(few_limbs) 82 stws,ma s0,4(res_ptr) 83 84 C start software pipeline ---------------------------------------------------- 85 LDEF(0) 86 fldds,ma 8(s1_ptr),%fr4 87 fldds,ma 8(s1_ptr),%fr8 88 89 xmpyu %fr4L,%fr31R,%fr5 90 xmpyu %fr4R,%fr31R,%fr6 91 xmpyu %fr8L,%fr31R,%fr9 92 xmpyu %fr8R,%fr31R,%fr10 93 94 fstds %fr5,-16(%r31) 95 fstds %fr6,-8(%r31) 96 fstds %fr9,0(%r31) 97 fstds %fr10,8(%r31) 98 99 ldws -16(%r31),hi0 100 ldws -12(%r31),lo0 101 ldws -8(%r31),hi1 102 ldws -4(%r31),lo1 103 ldws 0(%r31),hi2 104 ldws 4(%r31),lo2 105 ldws 8(%r31),hi3 106 ldws 12(%r31),lo3 107 108 addc lo0,cylimb,lo0 109 addc lo1,hi0,lo1 110 addc lo2,hi1,lo2 111 addc lo3,hi2,lo3 112 113 addib,< -4,size_param,L(end) 114 addc %r0,hi3,cylimb C propagate carry into cylimb 115 C main loop ------------------------------------------------------------------ 116 LDEF(loop) 117 fldds,ma 8(s1_ptr),%fr4 118 fldds,ma 8(s1_ptr),%fr8 119 120 ldws 0(res_ptr),s0 121 xmpyu %fr4L,%fr31R,%fr5 122 ldws 4(res_ptr),s1 123 xmpyu %fr4R,%fr31R,%fr6 124 ldws 8(res_ptr),s2 125 xmpyu %fr8L,%fr31R,%fr9 126 ldws 12(res_ptr),s3 127 xmpyu %fr8R,%fr31R,%fr10 128 129 fstds %fr5,-16(%r31) 130 sub s0,lo0,s0 131 fstds %fr6,-8(%r31) 132 subb s1,lo1,s1 133 fstds %fr9,0(%r31) 134 subb s2,lo2,s2 135 fstds %fr10,8(%r31) 136 subb s3,lo3,s3 137 subb %r0,%r0,lo0 C these two insns ... 138 add lo0,lo0,%r0 C ... just invert cy 139 140 ldws -16(%r31),hi0 141 ldws -12(%r31),lo0 142 ldws -8(%r31),hi1 143 ldws -4(%r31),lo1 144 ldws 0(%r31),hi2 145 ldws 4(%r31),lo2 146 ldws 8(%r31),hi3 147 ldws 12(%r31),lo3 148 149 addc lo0,cylimb,lo0 150 stws,ma s0,4(res_ptr) 151 addc lo1,hi0,lo1 152 stws,ma s1,4(res_ptr) 153 addc lo2,hi1,lo2 154 stws,ma s2,4(res_ptr) 155 addc lo3,hi2,lo3 156 stws,ma s3,4(res_ptr) 157 158 addib,>= -4,size_param,L(loop) 159 addc %r0,hi3,cylimb C propagate carry into cylimb 160 C finish software pipeline --------------------------------------------------- 161 LDEF(end) 162 ldws 0(res_ptr),s0 163 ldws 4(res_ptr),s1 164 ldws 8(res_ptr),s2 165 ldws 12(res_ptr),s3 166 167 sub s0,lo0,s0 168 stws,ma s0,4(res_ptr) 169 subb s1,lo1,s1 170 stws,ma s1,4(res_ptr) 171 subb s2,lo2,s2 172 stws,ma s2,4(res_ptr) 173 subb s3,lo3,s3 174 stws,ma s3,4(res_ptr) 175 subb %r0,%r0,lo0 C these two insns ... 176 add lo0,lo0,%r0 C ... invert cy 177 178 C restore callee-saves registers --------------------------------------------- 179 ldw -96(%r30),%r3 180 ldw -92(%r30),%r4 181 ldw -88(%r30),%r5 182 ldw -84(%r30),%r6 183 ldw -80(%r30),%r7 184 185 LDEF(few_limbs) 186 addib,=,n 4,size_param,L(ret) 187 188 LDEF(loop2) 189 fldws,ma 4(s1_ptr),%fr4 190 ldws 0(res_ptr),s0 191 xmpyu %fr4,%fr31R,%fr5 192 fstds %fr5,-16(%r30) 193 ldws -16(%r30),hi0 194 ldws -12(%r30),lo0 195 addc lo0,cylimb,lo0 196 addc %r0,hi0,cylimb 197 sub s0,lo0,s0 198 add s0,lo0,%r0 C invert cy 199 stws,ma s0,4(res_ptr) 200 addib,<> -1,size_param,L(loop2) 201 nop 202 203 LDEF(ret) 204 addc %r0,cylimb,cylimb 205 bv 0(%r2) 206 ldo -128(%r30),%r30 207 EPILOGUE(mpn_submul_1)