github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/m88k/mc88110/add_n.S (about) 1 ; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store 2 ; sum in a third limb vector. 3 4 ; Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6 ; This file is part of the GNU MP Library. 7 ; 8 ; The GNU MP Library is free software; you can redistribute it and/or modify 9 ; it under the terms of either: 10 ; 11 ; * the GNU Lesser General Public License as published by the Free 12 ; Software Foundation; either version 3 of the License, or (at your 13 ; option) any later version. 14 ; 15 ; or 16 ; 17 ; * the GNU General Public License as published by the Free Software 18 ; Foundation; either version 2 of the License, or (at your option) any 19 ; later version. 20 ; 21 ; or both in parallel, as here. 22 ; 23 ; The GNU MP Library is distributed in the hope that it will be useful, but 24 ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 ; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 ; for more details. 27 ; 28 ; You should have received copies of the GNU General Public License and the 29 ; GNU Lesser General Public License along with the GNU MP Library. If not, 30 ; see https://www.gnu.org/licenses/. 31 32 33 ; INPUT PARAMETERS 34 #define res_ptr r2 35 #define s1_ptr r3 36 #define s2_ptr r4 37 #define size r5 38 39 #include "sysdep.h" 40 41 text 42 align 16 43 global C_SYMBOL_NAME(__gmpn_add_n) 44 C_SYMBOL_NAME(__gmpn_add_n): 45 addu.co r0,r0,r0 ; clear cy flag 46 xor r12,s2_ptr,res_ptr 47 bb1 2,r12,L1 48 ; ** V1a ** 49 L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned? 50 /* Add least significant limb separately to align res_ptr and s2_ptr */ 51 ld r10,s1_ptr,0 52 addu s1_ptr,s1_ptr,4 53 ld r8,s2_ptr,0 54 addu s2_ptr,s2_ptr,4 55 subu size,size,1 56 addu.co r6,r10,r8 57 st r6,res_ptr,0 58 addu res_ptr,res_ptr,4 59 L_v1: cmp r12,size,2 60 bb1 lt,r12,Lend2 61 62 ld r10,s1_ptr,0 63 ld r12,s1_ptr,4 64 ld.d r8,s2_ptr,0 65 subu size,size,10 66 bcnd lt0,size,Lfin1 67 /* Add blocks of 8 limbs until less than 8 limbs remain */ 68 align 8 69 Loop1: subu size,size,8 70 addu.cio r6,r10,r8 71 ld r10,s1_ptr,8 72 addu.cio r7,r12,r9 73 ld r12,s1_ptr,12 74 ld.d r8,s2_ptr,8 75 st.d r6,res_ptr,0 76 addu.cio r6,r10,r8 77 ld r10,s1_ptr,16 78 addu.cio r7,r12,r9 79 ld r12,s1_ptr,20 80 ld.d r8,s2_ptr,16 81 st.d r6,res_ptr,8 82 addu.cio r6,r10,r8 83 ld r10,s1_ptr,24 84 addu.cio r7,r12,r9 85 ld r12,s1_ptr,28 86 ld.d r8,s2_ptr,24 87 st.d r6,res_ptr,16 88 addu.cio r6,r10,r8 89 ld r10,s1_ptr,32 90 addu.cio r7,r12,r9 91 ld r12,s1_ptr,36 92 addu s1_ptr,s1_ptr,32 93 ld.d r8,s2_ptr,32 94 addu s2_ptr,s2_ptr,32 95 st.d r6,res_ptr,24 96 addu res_ptr,res_ptr,32 97 bcnd ge0,size,Loop1 98 99 Lfin1: addu size,size,8-2 100 bcnd lt0,size,Lend1 101 /* Add blocks of 2 limbs until less than 2 limbs remain */ 102 Loope1: addu.cio r6,r10,r8 103 ld r10,s1_ptr,8 104 addu.cio r7,r12,r9 105 ld r12,s1_ptr,12 106 ld.d r8,s2_ptr,8 107 st.d r6,res_ptr,0 108 subu size,size,2 109 addu s1_ptr,s1_ptr,8 110 addu s2_ptr,s2_ptr,8 111 addu res_ptr,res_ptr,8 112 bcnd ge0,size,Loope1 113 Lend1: addu.cio r6,r10,r8 114 addu.cio r7,r12,r9 115 st.d r6,res_ptr,0 116 117 bb0 0,size,Lret1 118 /* Add last limb */ 119 ld r10,s1_ptr,8 120 ld r8,s2_ptr,8 121 addu.cio r6,r10,r8 122 st r6,res_ptr,8 123 124 Lret1: jmp.n r1 125 addu.ci r2,r0,r0 ; return carry-out from most sign. limb 126 127 L1: xor r12,s1_ptr,res_ptr 128 bb1 2,r12,L2 129 ; ** V1b ** 130 or r12,r0,s2_ptr 131 or s2_ptr,r0,s1_ptr 132 or s1_ptr,r0,r12 133 br L0 134 135 ; ** V2 ** 136 /* If we come here, the alignment of s1_ptr and res_ptr as well as the 137 alignment of s2_ptr and res_ptr differ. Since there are only two ways 138 things can be aligned (that we care about) we now know that the alignment 139 of s1_ptr and s2_ptr are the same. */ 140 141 L2: cmp r12,size,1 142 bb1 eq,r12,Ljone 143 bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned 144 /* Add least significant limb separately to align res_ptr and s2_ptr */ 145 ld r10,s1_ptr,0 146 addu s1_ptr,s1_ptr,4 147 ld r8,s2_ptr,0 148 addu s2_ptr,s2_ptr,4 149 subu size,size,1 150 addu.co r6,r10,r8 151 st r6,res_ptr,0 152 addu res_ptr,res_ptr,4 153 154 L_v2: subu size,size,8 155 bcnd lt0,size,Lfin2 156 /* Add blocks of 8 limbs until less than 8 limbs remain */ 157 align 8 158 Loop2: subu size,size,8 159 ld.d r8,s1_ptr,0 160 ld.d r6,s2_ptr,0 161 addu.cio r8,r8,r6 162 st r8,res_ptr,0 163 addu.cio r9,r9,r7 164 st r9,res_ptr,4 165 ld.d r8,s1_ptr,8 166 ld.d r6,s2_ptr,8 167 addu.cio r8,r8,r6 168 st r8,res_ptr,8 169 addu.cio r9,r9,r7 170 st r9,res_ptr,12 171 ld.d r8,s1_ptr,16 172 ld.d r6,s2_ptr,16 173 addu.cio r8,r8,r6 174 st r8,res_ptr,16 175 addu.cio r9,r9,r7 176 st r9,res_ptr,20 177 ld.d r8,s1_ptr,24 178 ld.d r6,s2_ptr,24 179 addu.cio r8,r8,r6 180 st r8,res_ptr,24 181 addu.cio r9,r9,r7 182 st r9,res_ptr,28 183 addu s1_ptr,s1_ptr,32 184 addu s2_ptr,s2_ptr,32 185 addu res_ptr,res_ptr,32 186 bcnd ge0,size,Loop2 187 188 Lfin2: addu size,size,8-2 189 bcnd lt0,size,Lend2 190 Loope2: ld.d r8,s1_ptr,0 191 ld.d r6,s2_ptr,0 192 addu.cio r8,r8,r6 193 st r8,res_ptr,0 194 addu.cio r9,r9,r7 195 st r9,res_ptr,4 196 subu size,size,2 197 addu s1_ptr,s1_ptr,8 198 addu s2_ptr,s2_ptr,8 199 addu res_ptr,res_ptr,8 200 bcnd ge0,size,Loope2 201 Lend2: bb0 0,size,Lret2 202 /* Add last limb */ 203 Ljone: ld r10,s1_ptr,0 204 ld r8,s2_ptr,0 205 addu.cio r6,r10,r8 206 st r6,res_ptr,0 207 208 Lret2: jmp.n r1 209 addu.ci r2,r0,r0 ; return carry-out from most sign. limb