github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/pa32/hppa1_1/pa7100/addmul_1.asm (about) 1 dnl HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and 2 dnl add the result to a second limb vector. 3 4 dnl Copyright 1995, 2000-2003 Free Software Foundation, Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 C INPUT PARAMETERS 35 define(`res_ptr',`%r26') 36 define(`s1_ptr',`%r25') 37 define(`size_param',`%r24') 38 define(`s2_limb',`%r23') 39 40 define(`cylimb',`%r28') 41 define(`s0',`%r19') 42 define(`s1',`%r20') 43 define(`s2',`%r3') 44 define(`s3',`%r4') 45 define(`lo0',`%r21') 46 define(`lo1',`%r5') 47 define(`lo2',`%r6') 48 define(`lo3',`%r7') 49 define(`hi0',`%r22') 50 define(`hi1',`%r23') C safe to reuse 51 define(`hi2',`%r29') 52 define(`hi3',`%r1') 53 54 ASM_START() 55 PROLOGUE(mpn_addmul_1) 56 C .callinfo frame=128,no_calls 57 58 ldo 128(%r30),%r30 59 stws s2_limb,-16(%r30) 60 add %r0,%r0,cylimb C clear cy and cylimb 61 addib,< -4,size_param,L(few_limbs) 62 fldws -16(%r30),%fr31R 63 64 ldo -112(%r30),%r31 65 stw %r3,-96(%r30) 66 stw %r4,-92(%r30) 67 stw %r5,-88(%r30) 68 stw %r6,-84(%r30) 69 stw %r7,-80(%r30) 70 71 bb,>=,n s1_ptr,29,L(0) 72 73 fldws,ma 4(s1_ptr),%fr4 74 ldws 0(res_ptr),s0 75 xmpyu %fr4,%fr31R,%fr5 76 fstds %fr5,-16(%r31) 77 ldws -16(%r31),cylimb 78 ldws -12(%r31),lo0 79 add s0,lo0,s0 80 addib,< -1,size_param,L(few_limbs) 81 stws,ma s0,4(res_ptr) 82 83 C start software pipeline ---------------------------------------------------- 84 LDEF(0) 85 fldds,ma 8(s1_ptr),%fr4 86 fldds,ma 8(s1_ptr),%fr8 87 88 xmpyu %fr4L,%fr31R,%fr5 89 xmpyu %fr4R,%fr31R,%fr6 90 xmpyu %fr8L,%fr31R,%fr9 91 xmpyu %fr8R,%fr31R,%fr10 92 93 fstds %fr5,-16(%r31) 94 fstds %fr6,-8(%r31) 95 fstds %fr9,0(%r31) 96 fstds %fr10,8(%r31) 97 98 ldws -16(%r31),hi0 99 ldws -12(%r31),lo0 100 ldws -8(%r31),hi1 101 ldws -4(%r31),lo1 102 ldws 0(%r31),hi2 103 ldws 4(%r31),lo2 104 ldws 8(%r31),hi3 105 ldws 12(%r31),lo3 106 107 addc lo0,cylimb,lo0 108 addc lo1,hi0,lo1 109 addc lo2,hi1,lo2 110 addc lo3,hi2,lo3 111 112 addib,< -4,size_param,L(end) 113 addc %r0,hi3,cylimb C propagate carry into cylimb 114 C main loop ------------------------------------------------------------------ 115 LDEF(loop) 116 fldds,ma 8(s1_ptr),%fr4 117 fldds,ma 8(s1_ptr),%fr8 118 119 ldws 0(res_ptr),s0 120 xmpyu %fr4L,%fr31R,%fr5 121 ldws 4(res_ptr),s1 122 xmpyu %fr4R,%fr31R,%fr6 123 ldws 8(res_ptr),s2 124 xmpyu %fr8L,%fr31R,%fr9 125 ldws 12(res_ptr),s3 126 xmpyu %fr8R,%fr31R,%fr10 127 128 fstds %fr5,-16(%r31) 129 add s0,lo0,s0 130 fstds %fr6,-8(%r31) 131 addc s1,lo1,s1 132 fstds %fr9,0(%r31) 133 addc s2,lo2,s2 134 fstds %fr10,8(%r31) 135 addc s3,lo3,s3 136 137 ldws -16(%r31),hi0 138 ldws -12(%r31),lo0 139 ldws -8(%r31),hi1 140 ldws -4(%r31),lo1 141 ldws 0(%r31),hi2 142 ldws 4(%r31),lo2 143 ldws 8(%r31),hi3 144 ldws 12(%r31),lo3 145 146 addc lo0,cylimb,lo0 147 stws,ma s0,4(res_ptr) 148 addc lo1,hi0,lo1 149 stws,ma s1,4(res_ptr) 150 addc lo2,hi1,lo2 151 stws,ma s2,4(res_ptr) 152 addc lo3,hi2,lo3 153 stws,ma s3,4(res_ptr) 154 155 addib,>= -4,size_param,L(loop) 156 addc %r0,hi3,cylimb C propagate carry into cylimb 157 C finish software pipeline --------------------------------------------------- 158 LDEF(end) 159 ldws 0(res_ptr),s0 160 ldws 4(res_ptr),s1 161 ldws 8(res_ptr),s2 162 ldws 12(res_ptr),s3 163 164 add s0,lo0,s0 165 stws,ma s0,4(res_ptr) 166 addc s1,lo1,s1 167 stws,ma s1,4(res_ptr) 168 addc s2,lo2,s2 169 stws,ma s2,4(res_ptr) 170 addc s3,lo3,s3 171 stws,ma s3,4(res_ptr) 172 173 C restore callee-saves registers --------------------------------------------- 174 ldw -96(%r30),%r3 175 ldw -92(%r30),%r4 176 ldw -88(%r30),%r5 177 ldw -84(%r30),%r6 178 ldw -80(%r30),%r7 179 180 LDEF(few_limbs) 181 addib,=,n 4,size_param,L(ret) 182 183 LDEF(loop2) 184 fldws,ma 4(s1_ptr),%fr4 185 ldws 0(res_ptr),s0 186 xmpyu %fr4,%fr31R,%fr5 187 fstds %fr5,-16(%r30) 188 ldws -16(%r30),hi0 189 ldws -12(%r30),lo0 190 addc lo0,cylimb,lo0 191 addc %r0,hi0,cylimb 192 add s0,lo0,s0 193 stws,ma s0,4(res_ptr) 194 addib,<> -1,size_param,L(loop2) 195 nop 196 197 LDEF(ret) 198 addc %r0,cylimb,cylimb 199 bv 0(%r2) 200 ldo -128(%r30),%r30 201 EPILOGUE(mpn_addmul_1)