github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/math/big/arith_arm.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "../../../cmd/ld/textflag.h" 6 7 // This file provides fast assembly versions for the elementary 8 // arithmetic operations on vectors implemented in arith.go. 9 10 #define CFLAG 29 // bit position of carry flag 11 12 // func addVV(z, x, y []Word) (c Word) 13 TEXT ·addVV(SB),NOSPLIT,$0 14 MOVW $0, R0 15 MOVW z+0(FP), R1 16 MOVW x+12(FP), R2 17 MOVW y+24(FP), R3 18 MOVW z_len+4(FP), R4 19 MOVW R4<<2, R4 20 ADD R1, R4 21 B E1 22 L1: 23 MOVW.P 4(R2), R5 24 MOVW.P 4(R3), R6 25 MOVW R0, CPSR 26 ADC.S R6, R5 27 MOVW.P R5, 4(R1) 28 MOVW CPSR, R0 29 E1: 30 CMP R1, R4 31 BNE L1 32 33 MOVW R0>>CFLAG, R0 34 AND $1, R0 35 MOVW R0, c+36(FP) 36 RET 37 38 39 // func subVV(z, x, y []Word) (c Word) 40 // (same as addVV except for SBC instead of ADC and label names) 41 TEXT ·subVV(SB),NOSPLIT,$0 42 MOVW $(1<<CFLAG), R0 43 MOVW z+0(FP), R1 44 MOVW x+12(FP), R2 45 MOVW y+24(FP), R3 46 MOVW z_len+4(FP), R4 47 MOVW R4<<2, R4 48 ADD R1, R4 49 B E2 50 L2: 51 MOVW.P 4(R2), R5 52 MOVW.P 4(R3), R6 53 MOVW R0, CPSR 54 SBC.S R6, R5 55 MOVW.P R5, 4(R1) 56 MOVW CPSR, R0 57 E2: 58 CMP R1, R4 59 BNE L2 60 61 MOVW R0>>CFLAG, R0 62 AND $1, R0 63 EOR $1, R0 64 MOVW R0, c+36(FP) 65 RET 66 67 68 // func addVW(z, x []Word, y Word) (c Word) 69 TEXT ·addVW(SB),NOSPLIT,$0 70 MOVW z+0(FP), R1 71 MOVW x+12(FP), R2 72 MOVW y+24(FP), R3 73 MOVW z_len+4(FP), R4 74 MOVW R4<<2, R4 75 ADD R1, R4 76 CMP R1, R4 77 BNE L3a 78 MOVW R3, c+28(FP) 79 RET 80 L3a: 81 MOVW.P 4(R2), R5 82 ADD.S R3, R5 83 MOVW.P R5, 4(R1) 84 MOVW CPSR, R0 85 B E3 86 L3: 87 MOVW.P 4(R2), R5 88 MOVW R0, CPSR 89 ADC.S $0, R5 90 MOVW.P R5, 4(R1) 91 MOVW CPSR, R0 92 E3: 93 CMP R1, R4 94 BNE L3 95 96 MOVW R0>>CFLAG, R0 97 AND $1, R0 98 MOVW R0, c+28(FP) 99 RET 100 101 102 // func subVW(z, x []Word, y Word) (c Word) 103 TEXT ·subVW(SB),NOSPLIT,$0 104 MOVW z+0(FP), R1 105 MOVW x+12(FP), R2 106 MOVW y+24(FP), R3 107 MOVW z_len+4(FP), R4 108 MOVW R4<<2, R4 109 ADD R1, R4 110 CMP R1, R4 111 BNE L4a 112 MOVW R3, c+28(FP) 113 RET 114 L4a: 115 MOVW.P 4(R2), R5 116 SUB.S R3, R5 117 MOVW.P R5, 4(R1) 118 MOVW CPSR, R0 119 B E4 120 L4: 121 MOVW.P 4(R2), R5 122 MOVW R0, CPSR 123 SBC.S $0, R5 124 MOVW.P R5, 4(R1) 125 MOVW CPSR, R0 126 E4: 127 CMP R1, R4 128 BNE L4 129 130 MOVW R0>>CFLAG, R0 131 AND $1, R0 132 EOR $1, R0 133 MOVW R0, c+28(FP) 134 RET 135 136 137 // func shlVU(z, x []Word, s uint) (c Word) 138 TEXT ·shlVU(SB),NOSPLIT,$0 139 MOVW z_len+4(FP), R5 140 CMP $0, R5 141 BEQ X7 142 143 MOVW z+0(FP), R1 144 MOVW x+12(FP), R2 145 MOVW R5<<2, R5 146 ADD R5, R2 147 ADD R1, R5 148 MOVW s+24(FP), R3 149 CMP $0, R3 // shift 0 is special 150 BEQ Y7 151 ADD $4, R1 // stop one word early 152 MOVW $32, R4 153 SUB R3, R4 154 MOVW $0, R7 155 156 MOVW.W -4(R2), R6 157 MOVW R6<<R3, R7 158 MOVW R6>>R4, R6 159 MOVW R6, c+28(FP) 160 B E7 161 162 L7: 163 MOVW.W -4(R2), R6 164 ORR R6>>R4, R7 165 MOVW.W R7, -4(R5) 166 MOVW R6<<R3, R7 167 E7: 168 CMP R1, R5 169 BNE L7 170 171 MOVW R7, -4(R5) 172 RET 173 174 Y7: // copy loop, because shift 0 == shift 32 175 MOVW.W -4(R2), R6 176 MOVW.W R6, -4(R5) 177 CMP R1, R5 178 BNE Y7 179 180 X7: 181 MOVW $0, R1 182 MOVW R1, c+28(FP) 183 RET 184 185 186 // func shrVU(z, x []Word, s uint) (c Word) 187 TEXT ·shrVU(SB),NOSPLIT,$0 188 MOVW z_len+4(FP), R5 189 CMP $0, R5 190 BEQ X6 191 192 MOVW z+0(FP), R1 193 MOVW x+12(FP), R2 194 MOVW R5<<2, R5 195 ADD R1, R5 196 MOVW s+24(FP), R3 197 CMP $0, R3 // shift 0 is special 198 BEQ Y6 199 SUB $4, R5 // stop one word early 200 MOVW $32, R4 201 SUB R3, R4 202 MOVW $0, R7 203 204 // first word 205 MOVW.P 4(R2), R6 206 MOVW R6>>R3, R7 207 MOVW R6<<R4, R6 208 MOVW R6, c+28(FP) 209 B E6 210 211 // word loop 212 L6: 213 MOVW.P 4(R2), R6 214 ORR R6<<R4, R7 215 MOVW.P R7, 4(R1) 216 MOVW R6>>R3, R7 217 E6: 218 CMP R1, R5 219 BNE L6 220 221 MOVW R7, 0(R1) 222 RET 223 224 Y6: // copy loop, because shift 0 == shift 32 225 MOVW.P 4(R2), R6 226 MOVW.P R6, 4(R1) 227 CMP R1, R5 228 BNE Y6 229 230 X6: 231 MOVW $0, R1 232 MOVW R1, c+28(FP) 233 RET 234 235 236 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 237 TEXT ·mulAddVWW(SB),NOSPLIT,$0 238 MOVW $0, R0 239 MOVW z+0(FP), R1 240 MOVW x+12(FP), R2 241 MOVW y+24(FP), R3 242 MOVW r+28(FP), R4 243 MOVW z_len+4(FP), R5 244 MOVW R5<<2, R5 245 ADD R1, R5 246 B E8 247 248 // word loop 249 L8: 250 MOVW.P 4(R2), R6 251 MULLU R6, R3, (R7, R6) 252 ADD.S R4, R6 253 ADC R0, R7 254 MOVW.P R6, 4(R1) 255 MOVW R7, R4 256 E8: 257 CMP R1, R5 258 BNE L8 259 260 MOVW R4, c+32(FP) 261 RET 262 263 264 // func addMulVVW(z, x []Word, y Word) (c Word) 265 TEXT ·addMulVVW(SB),NOSPLIT,$0 266 MOVW $0, R0 267 MOVW z+0(FP), R1 268 MOVW x+12(FP), R2 269 MOVW y+24(FP), R3 270 MOVW z_len+4(FP), R5 271 MOVW R5<<2, R5 272 ADD R1, R5 273 MOVW $0, R4 274 B E9 275 276 // word loop 277 L9: 278 MOVW.P 4(R2), R6 279 MULLU R6, R3, (R7, R6) 280 ADD.S R4, R6 281 ADC R0, R7 282 MOVW 0(R1), R4 283 ADD.S R4, R6 284 ADC R0, R7 285 MOVW.P R6, 4(R1) 286 MOVW R7, R4 287 E9: 288 CMP R1, R5 289 BNE L9 290 291 MOVW R4, c+28(FP) 292 RET 293 294 295 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) 296 TEXT ·divWVW(SB),NOSPLIT,$0 297 // ARM has no multiword division, so use portable code. 298 B ·divWVW_g(SB) 299 300 301 // func divWW(x1, x0, y Word) (q, r Word) 302 TEXT ·divWW(SB),NOSPLIT,$0 303 // ARM has no multiword division, so use portable code. 304 B ·divWW_g(SB) 305 306 307 // func mulWW(x, y Word) (z1, z0 Word) 308 TEXT ·mulWW(SB),NOSPLIT,$0 309 MOVW x+0(FP), R1 310 MOVW y+4(FP), R2 311 MULLU R1, R2, (R4, R3) 312 MOVW R4, z1+8(FP) 313 MOVW R3, z0+12(FP) 314 RET 315 316 // func bitLen(x Word) (n int) 317 TEXT ·bitLen(SB),NOSPLIT,$0 318 MOVW x+0(FP), R0 319 CLZ R0, R0 320 MOVW $32, R1 321 SUB.S R0, R1 322 MOVW R1, n+4(FP) 323 RET