github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/math/big/arith_arm.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file provides fast assembly versions for the elementary 6 // arithmetic operations on vectors implemented in arith.go. 7 8 #define CFLAG 29 // bit position of carry flag 9 10 // func addVV(z, x, y []Word) (c Word) 11 TEXT ·addVV(SB),7,$0 12 MOVW $0, R0 13 MOVW z+0(FP), R1 14 MOVW x+12(FP), R2 15 MOVW y+24(FP), R3 16 MOVW z_len+4(FP), R4 17 MOVW R4<<2, R4 18 ADD R1, R4 19 B E1 20 L1: 21 MOVW.P 4(R2), R5 22 MOVW.P 4(R3), R6 23 MOVW R0, CPSR 24 ADC.S R6, R5 25 MOVW.P R5, 4(R1) 26 MOVW CPSR, R0 27 E1: 28 CMP R1, R4 29 BNE L1 30 31 MOVW R0>>CFLAG, R0 32 AND $1, R0 33 MOVW R0, c+36(FP) 34 RET 35 36 37 // func subVV(z, x, y []Word) (c Word) 38 // (same as addVV except for SBC instead of ADC and label names) 39 TEXT ·subVV(SB),7,$0 40 MOVW $(1<<CFLAG), R0 41 MOVW z+0(FP), R1 42 MOVW x+12(FP), R2 43 MOVW y+24(FP), R3 44 MOVW z_len+4(FP), R4 45 MOVW R4<<2, R4 46 ADD R1, R4 47 B E2 48 L2: 49 MOVW.P 4(R2), R5 50 MOVW.P 4(R3), R6 51 MOVW R0, CPSR 52 SBC.S R6, R5 53 MOVW.P R5, 4(R1) 54 MOVW CPSR, R0 55 E2: 56 CMP R1, R4 57 BNE L2 58 59 MOVW R0>>CFLAG, R0 60 AND $1, R0 61 EOR $1, R0 62 MOVW R0, c+36(FP) 63 RET 64 65 66 // func addVW(z, x []Word, y Word) (c Word) 67 TEXT ·addVW(SB),7,$0 68 MOVW z+0(FP), R1 69 MOVW x+12(FP), R2 70 MOVW y+24(FP), R3 71 MOVW z_len+4(FP), R4 72 MOVW R4<<2, R4 73 ADD R1, R4 74 CMP R1, R4 75 BNE L3a 76 MOVW R3, c+28(FP) 77 RET 78 L3a: 79 MOVW.P 4(R2), R5 80 ADD.S R3, R5 81 MOVW.P R5, 4(R1) 82 MOVW CPSR, R0 83 B E3 84 L3: 85 MOVW.P 4(R2), R5 86 MOVW R0, CPSR 87 ADC.S $0, R5 88 MOVW.P R5, 4(R1) 89 MOVW CPSR, R0 90 E3: 91 CMP R1, R4 92 BNE L3 93 94 MOVW R0>>CFLAG, R0 95 AND $1, R0 96 MOVW R0, c+28(FP) 97 RET 98 99 100 // func subVW(z, x []Word, y Word) (c Word) 101 TEXT ·subVW(SB),7,$0 102 MOVW z+0(FP), R1 103 MOVW x+12(FP), R2 104 MOVW y+24(FP), R3 105 MOVW z_len+4(FP), R4 106 MOVW R4<<2, R4 107 ADD R1, R4 108 CMP R1, R4 109 BNE L4a 110 MOVW R3, c+28(FP) 111 RET 112 L4a: 113 MOVW.P 4(R2), R5 114 SUB.S R3, R5 115 MOVW.P R5, 4(R1) 116 MOVW CPSR, R0 117 B E4 118 L4: 119 MOVW.P 4(R2), R5 120 MOVW R0, CPSR 121 SBC.S $0, R5 122 MOVW.P R5, 4(R1) 123 MOVW CPSR, R0 124 E4: 125 CMP R1, R4 126 BNE L4 127 128 MOVW R0>>CFLAG, R0 129 AND $1, R0 130 EOR $1, R0 131 MOVW R0, c+28(FP) 132 RET 133 134 135 // func shlVU(z, x []Word, s uint) (c Word) 136 TEXT ·shlVU(SB),7,$0 137 MOVW z_len+4(FP), R5 138 CMP $0, R5 139 BEQ X7 140 141 MOVW z+0(FP), R1 142 MOVW x+12(FP), R2 143 MOVW R5<<2, R5 144 ADD R5, R2 145 ADD R1, R5 146 MOVW s+24(FP), R3 147 CMP $0, R3 // shift 0 is special 148 BEQ Y7 149 ADD $4, R1 // stop one word early 150 MOVW $32, R4 151 SUB R3, R4 152 MOVW $0, R7 153 154 MOVW.W -4(R2), R6 155 MOVW R6<<R3, R7 156 MOVW R6>>R4, R6 157 MOVW R6, c+28(FP) 158 B E7 159 160 L7: 161 MOVW.W -4(R2), R6 162 ORR R6>>R4, R7 163 MOVW.W R7, -4(R5) 164 MOVW R6<<R3, R7 165 E7: 166 CMP R1, R5 167 BNE L7 168 169 MOVW R7, -4(R5) 170 RET 171 172 Y7: // copy loop, because shift 0 == shift 32 173 MOVW.W -4(R2), R6 174 MOVW.W R6, -4(R5) 175 CMP R1, R5 176 BNE Y7 177 178 X7: 179 MOVW $0, R1 180 MOVW R1, c+28(FP) 181 RET 182 183 184 // func shrVU(z, x []Word, s uint) (c Word) 185 TEXT ·shrVU(SB),7,$0 186 MOVW z_len+4(FP), R5 187 CMP $0, R5 188 BEQ X6 189 190 MOVW z+0(FP), R1 191 MOVW x+12(FP), R2 192 MOVW R5<<2, R5 193 ADD R1, R5 194 MOVW s+24(FP), R3 195 CMP $0, R3 // shift 0 is special 196 BEQ Y6 197 SUB $4, R5 // stop one word early 198 MOVW $32, R4 199 SUB R3, R4 200 MOVW $0, R7 201 202 // first word 203 MOVW.P 4(R2), R6 204 MOVW R6>>R3, R7 205 MOVW R6<<R4, R6 206 MOVW R6, c+28(FP) 207 B E6 208 209 // word loop 210 L6: 211 MOVW.P 4(R2), R6 212 ORR R6<<R4, R7 213 MOVW.P R7, 4(R1) 214 MOVW R6>>R3, R7 215 E6: 216 CMP R1, R5 217 BNE L6 218 219 MOVW R7, 0(R1) 220 RET 221 222 Y6: // copy loop, because shift 0 == shift 32 223 MOVW.P 4(R2), R6 224 MOVW.P R6, 4(R1) 225 CMP R1, R5 226 BNE Y6 227 228 X6: 229 MOVW $0, R1 230 MOVW R1, c+28(FP) 231 RET 232 233 234 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 235 TEXT ·mulAddVWW(SB),7,$0 236 MOVW $0, R0 237 MOVW z+0(FP), R1 238 MOVW x+12(FP), R2 239 MOVW y+24(FP), R3 240 MOVW r+28(FP), R4 241 MOVW z_len+4(FP), R5 242 MOVW R5<<2, R5 243 ADD R1, R5 244 B E8 245 246 // word loop 247 L8: 248 MOVW.P 4(R2), R6 249 MULLU R6, R3, (R7, R6) 250 ADD.S R4, R6 251 ADC R0, R7 252 MOVW.P R6, 4(R1) 253 MOVW R7, R4 254 E8: 255 CMP R1, R5 256 BNE L8 257 258 MOVW R4, c+32(FP) 259 RET 260 261 262 // func addMulVVW(z, x []Word, y Word) (c Word) 263 TEXT ·addMulVVW(SB),7,$0 264 MOVW $0, R0 265 MOVW z+0(FP), R1 266 MOVW x+12(FP), R2 267 MOVW y+24(FP), R3 268 MOVW z_len+4(FP), R5 269 MOVW R5<<2, R5 270 ADD R1, R5 271 MOVW $0, R4 272 B E9 273 274 // word loop 275 L9: 276 MOVW.P 4(R2), R6 277 MULLU R6, R3, (R7, R6) 278 ADD.S R4, R6 279 ADC R0, R7 280 MOVW 0(R1), R4 281 ADD.S R4, R6 282 ADC R0, R7 283 MOVW.P R6, 4(R1) 284 MOVW R7, R4 285 E9: 286 CMP R1, R5 287 BNE L9 288 289 MOVW R4, c+28(FP) 290 RET 291 292 293 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) 294 TEXT ·divWVW(SB),7,$0 295 // ARM has no multiword division, so use portable code. 296 B ·divWVW_g(SB) 297 298 299 // func divWW(x1, x0, y Word) (q, r Word) 300 TEXT ·divWW(SB),7,$0 301 // ARM has no multiword division, so use portable code. 302 B ·divWW_g(SB) 303 304 305 // func mulWW(x, y Word) (z1, z0 Word) 306 TEXT ·mulWW(SB),7,$0 307 MOVW x+0(FP), R1 308 MOVW y+4(FP), R2 309 MULLU R1, R2, (R4, R3) 310 MOVW R4, z1+8(FP) 311 MOVW R3, z0+12(FP) 312 RET 313 314 // func bitLen(x Word) (n int) 315 TEXT ·bitLen(SB),7,$0 316 MOVW x+0(FP), R0 317 CLZ R0, R0 318 MOVW $32, R1 319 SUB.S R0, R1 320 MOVW R1, n+4(FP) 321 RET