github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/math/big/arith_arm.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // This file provides fast assembly versions for the elementary 8 // arithmetic operations on vectors implemented in arith.go. 9 10 // func addVV(z, x, y []Word) (c Word) 11 TEXT ·addVV(SB),NOSPLIT,$0 12 ADD.S $0, R0 // clear carry flag 13 MOVW z+0(FP), R1 14 MOVW z_len+4(FP), R4 15 MOVW x+12(FP), R2 16 MOVW y+24(FP), R3 17 ADD R4<<2, R1, R4 18 B E1 19 L1: 20 MOVW.P 4(R2), R5 21 MOVW.P 4(R3), R6 22 ADC.S R6, R5 23 MOVW.P R5, 4(R1) 24 E1: 25 TEQ R1, R4 26 BNE L1 27 28 MOVW $0, R0 29 MOVW.CS $1, R0 30 MOVW R0, c+36(FP) 31 RET 32 33 34 // func subVV(z, x, y []Word) (c Word) 35 // (same as addVV except for SBC instead of ADC and label names) 36 TEXT ·subVV(SB),NOSPLIT,$0 37 SUB.S $0, R0 // clear borrow flag 38 MOVW z+0(FP), R1 39 MOVW z_len+4(FP), R4 40 MOVW x+12(FP), R2 41 MOVW y+24(FP), R3 42 ADD R4<<2, R1, R4 43 B E2 44 L2: 45 MOVW.P 4(R2), R5 46 MOVW.P 4(R3), R6 47 SBC.S R6, R5 48 MOVW.P R5, 4(R1) 49 E2: 50 TEQ R1, R4 51 BNE L2 52 53 MOVW $0, R0 54 MOVW.CC $1, R0 55 MOVW R0, c+36(FP) 56 RET 57 58 59 // func addVW(z, x []Word, y Word) (c Word) 60 TEXT ·addVW(SB),NOSPLIT,$0 61 MOVW z+0(FP), R1 62 MOVW z_len+4(FP), R4 63 MOVW x+12(FP), R2 64 MOVW y+24(FP), R3 65 ADD R4<<2, R1, R4 66 TEQ R1, R4 67 BNE L3a 68 MOVW R3, c+28(FP) 69 RET 70 L3a: 71 MOVW.P 4(R2), R5 72 ADD.S R3, R5 73 MOVW.P R5, 4(R1) 74 B E3 75 L3: 76 MOVW.P 4(R2), R5 77 ADC.S $0, R5 78 MOVW.P R5, 4(R1) 79 E3: 80 TEQ R1, R4 81 BNE L3 82 83 MOVW $0, R0 84 MOVW.CS $1, R0 85 MOVW R0, c+28(FP) 86 RET 87 88 89 // func subVW(z, x []Word, y Word) (c Word) 90 TEXT ·subVW(SB),NOSPLIT,$0 91 MOVW z+0(FP), R1 92 MOVW z_len+4(FP), R4 93 MOVW x+12(FP), R2 94 MOVW y+24(FP), R3 95 ADD R4<<2, R1, R4 96 TEQ R1, R4 97 BNE L4a 98 MOVW R3, c+28(FP) 99 RET 100 L4a: 101 MOVW.P 4(R2), R5 102 SUB.S R3, R5 103 MOVW.P R5, 4(R1) 104 B E4 105 L4: 106 MOVW.P 4(R2), R5 107 SBC.S $0, R5 108 MOVW.P R5, 4(R1) 109 E4: 110 TEQ R1, R4 111 BNE L4 112 113 MOVW $0, R0 114 MOVW.CC $1, R0 115 MOVW R0, c+28(FP) 116 RET 117 118 119 // func shlVU(z, x []Word, s uint) (c Word) 120 TEXT ·shlVU(SB),NOSPLIT,$0 121 MOVW z_len+4(FP), R5 122 TEQ $0, R5 123 BEQ X7 124 125 MOVW z+0(FP), R1 126 MOVW x+12(FP), R2 127 ADD R5<<2, R2, R2 128 ADD R5<<2, R1, R5 129 MOVW s+24(FP), R3 130 TEQ $0, R3 // shift 0 is special 131 BEQ Y7 132 ADD $4, R1 // stop one word early 133 MOVW $32, R4 134 SUB R3, R4 135 MOVW $0, R7 136 137 MOVW.W -4(R2), R6 138 MOVW R6<<R3, R7 139 MOVW R6>>R4, R6 140 MOVW R6, c+28(FP) 141 B E7 142 143 L7: 144 MOVW.W -4(R2), R6 145 ORR R6>>R4, R7 146 MOVW.W R7, -4(R5) 147 MOVW R6<<R3, R7 148 E7: 149 TEQ R1, R5 150 BNE L7 151 152 MOVW R7, -4(R5) 153 RET 154 155 Y7: // copy loop, because shift 0 == shift 32 156 MOVW.W -4(R2), R6 157 MOVW.W R6, -4(R5) 158 TEQ R1, R5 159 BNE Y7 160 161 X7: 162 MOVW $0, R1 163 MOVW R1, c+28(FP) 164 RET 165 166 167 // func shrVU(z, x []Word, s uint) (c Word) 168 TEXT ·shrVU(SB),NOSPLIT,$0 169 MOVW z_len+4(FP), R5 170 TEQ $0, R5 171 BEQ X6 172 173 MOVW z+0(FP), R1 174 MOVW x+12(FP), R2 175 ADD R5<<2, R1, R5 176 MOVW s+24(FP), R3 177 TEQ $0, R3 // shift 0 is special 178 BEQ Y6 179 SUB $4, R5 // stop one word early 180 MOVW $32, R4 181 SUB R3, R4 182 MOVW $0, R7 183 184 // first word 185 MOVW.P 4(R2), R6 186 MOVW R6>>R3, R7 187 MOVW R6<<R4, R6 188 MOVW R6, c+28(FP) 189 B E6 190 191 // word loop 192 L6: 193 MOVW.P 4(R2), R6 194 ORR R6<<R4, R7 195 MOVW.P R7, 4(R1) 196 MOVW R6>>R3, R7 197 E6: 198 TEQ R1, R5 199 BNE L6 200 201 MOVW R7, 0(R1) 202 RET 203 204 Y6: // copy loop, because shift 0 == shift 32 205 MOVW.P 4(R2), R6 206 MOVW.P R6, 4(R1) 207 TEQ R1, R5 208 BNE Y6 209 210 X6: 211 MOVW $0, R1 212 MOVW R1, c+28(FP) 213 RET 214 215 216 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 217 TEXT ·mulAddVWW(SB),NOSPLIT,$0 218 MOVW $0, R0 219 MOVW z+0(FP), R1 220 MOVW z_len+4(FP), R5 221 MOVW x+12(FP), R2 222 MOVW y+24(FP), R3 223 MOVW r+28(FP), R4 224 ADD R5<<2, R1, R5 225 B E8 226 227 // word loop 228 L8: 229 MOVW.P 4(R2), R6 230 MULLU R6, R3, (R7, R6) 231 ADD.S R4, R6 232 ADC R0, R7 233 MOVW.P R6, 4(R1) 234 MOVW R7, R4 235 E8: 236 TEQ R1, R5 237 BNE L8 238 239 MOVW R4, c+32(FP) 240 RET 241 242 243 // func addMulVVW(z, x []Word, y Word) (c Word) 244 TEXT ·addMulVVW(SB),NOSPLIT,$0 245 MOVW $0, R0 246 MOVW z+0(FP), R1 247 MOVW z_len+4(FP), R5 248 MOVW x+12(FP), R2 249 MOVW y+24(FP), R3 250 ADD R5<<2, R1, R5 251 MOVW $0, R4 252 B E9 253 254 // word loop 255 L9: 256 MOVW.P 4(R2), R6 257 MULLU R6, R3, (R7, R6) 258 ADD.S R4, R6 259 ADC R0, R7 260 MOVW 0(R1), R4 261 ADD.S R4, R6 262 ADC R0, R7 263 MOVW.P R6, 4(R1) 264 MOVW R7, R4 265 E9: 266 TEQ R1, R5 267 BNE L9 268 269 MOVW R4, c+28(FP) 270 RET 271 272 273 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) 274 TEXT ·divWVW(SB),NOSPLIT,$0 275 // ARM has no multiword division, so use portable code. 276 B ·divWVW_g(SB) 277 278 279 // func divWW(x1, x0, y Word) (q, r Word) 280 TEXT ·divWW(SB),NOSPLIT,$0 281 // ARM has no multiword division, so use portable code. 282 B ·divWW_g(SB) 283 284 285 // func mulWW(x, y Word) (z1, z0 Word) 286 TEXT ·mulWW(SB),NOSPLIT,$0 287 MOVW x+0(FP), R1 288 MOVW y+4(FP), R2 289 MULLU R1, R2, (R4, R3) 290 MOVW R4, z1+8(FP) 291 MOVW R3, z0+12(FP) 292 RET 293 294 // func bitLen(x Word) (n int) 295 TEXT ·bitLen(SB),NOSPLIT,$0 296 MOVW x+0(FP), R0 297 CLZ R0, R0 298 RSB $32, R0 299 MOVW R0, n+4(FP) 300 RET