github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/vlop_arm.s (about) 1 // Inferno's libkern/vlop-arm.s 2 // http://code.google.com/p/inferno-os/source/browse/libkern/vlop-arm.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include "zasm_GOOS_GOARCH.h" 27 #include "../../cmd/ld/textflag.h" 28 29 arg=0 30 31 /* replaced use of R10 by R11 because the former can be the data segment base register */ 32 33 TEXT _mulv(SB), NOSPLIT, $0 34 MOVW 0(FP), R0 35 MOVW 4(FP), R2 /* l0 */ 36 MOVW 8(FP), R11 /* h0 */ 37 MOVW 12(FP), R4 /* l1 */ 38 MOVW 16(FP), R5 /* h1 */ 39 MULLU R4, R2, (R7,R6) 40 MUL R11, R4, R8 41 ADD R8, R7 42 MUL R2, R5, R8 43 ADD R8, R7 44 MOVW R6, 0(R(arg)) 45 MOVW R7, 4(R(arg)) 46 RET 47 48 // trampoline for _sfloat2. passes LR as arg0 and 49 // saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can 50 // be changed by _sfloat2. 51 TEXT _sfloat(SB), NOSPLIT, $64-0 // 4 arg + 14*4 saved regs + cpsr 52 MOVW R14, 4(R13) 53 MOVW R0, 8(R13) 54 MOVW $12(R13), R0 55 MOVM.IA.W [R1-R12], (R0) 56 MOVW $68(R13), R1 // correct for frame size 57 MOVW R1, 60(R13) 58 WORD $0xe10f1000 // mrs r1, cpsr 59 MOVW R1, 64(R13) 60 // Disable preemption of this goroutine during _sfloat2 by 61 // m->locks++ and m->locks-- around the call. 62 // Rescheduling this goroutine may cause the loss of the 63 // contents of the software floating point registers in 64 // m->freghi, m->freglo, m->fflag, if the goroutine is moved 65 // to a different m or another goroutine runs on this m. 66 // Rescheduling at ordinary function calls is okay because 67 // all registers are caller save, but _sfloat2 and the things 68 // that it runs are simulating the execution of individual 69 // program instructions, and those instructions do not expect 70 // the floating point registers to be lost. 71 // An alternative would be to move the software floating point 72 // registers into G, but they do not need to be kept at the 73 // usual places a goroutine reschedules (at function calls), 74 // so it would be a waste of 132 bytes per G. 75 MOVW m_locks(m), R1 76 ADD $1, R1 77 MOVW R1, m_locks(m) 78 BL runtime·_sfloat2(SB) 79 MOVW m_locks(m), R1 80 SUB $1, R1 81 MOVW R1, m_locks(m) 82 MOVW R0, 0(R13) 83 MOVW 64(R13), R1 84 WORD $0xe128f001 // msr cpsr_f, r1 85 MOVW $12(R13), R0 86 // Restore R1-R8 and R11-R12, but ignore the saved R9 (m) and R10 (g). 87 // Both are maintained by the runtime and always have correct values, 88 // so there is no need to restore old values here. 89 // The g should not have changed, but m may have, if we were preempted 90 // and restarted on a different thread, in which case restoring the old 91 // value is incorrect and will cause serious confusion in the runtime. 92 MOVM.IA.W (R0), [R1-R8] 93 MOVW $52(R13), R0 94 MOVM.IA.W (R0), [R11-R12] 95 MOVW 8(R13), R0 96 RET 97 98 // func udiv(n, d uint32) (q, r uint32) 99 // Reference: 100 // Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software 101 // Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740 102 q = 0 // input d, output q 103 r = 1 // input n, output r 104 s = 2 // three temporary variables 105 M = 3 106 a = 11 107 // Please be careful when changing this, it is pretty fragile: 108 // 1, don't use unconditional branch as the linker is free to reorder the blocks; 109 // 2. if a == 11, beware that the linker will use R11 if you use certain instructions. 110 TEXT udiv<>(SB),NOSPLIT,$-4 111 CLZ R(q), R(s) // find normalizing shift 112 MOVW.S R(q)<<R(s), R(a) 113 ADD R(a)>>25, PC, R(a) // most significant 7 bits of divisor 114 MOVBU.NE (4*36-64)(R(a)), R(a) // 36 == number of inst. between fast_udiv_tab and begin 115 116 begin: 117 SUB.S $7, R(s) 118 RSB $0, R(q), R(M) // M = -q 119 MOVW.PL R(a)<<R(s), R(q) 120 121 // 1st Newton iteration 122 MUL.PL R(M), R(q), R(a) // a = -q*d 123 BMI udiv_by_large_d 124 MULAWT R(a), R(q), R(q), R(q) // q approx q-(q*q*d>>32) 125 TEQ R(M)->1, R(M) // check for d=0 or d=1 126 127 // 2nd Newton iteration 128 MUL.NE R(M), R(q), R(a) 129 MOVW.NE $0, R(s) 130 MULAL.NE R(q), R(a), (R(q),R(s)) 131 BEQ udiv_by_0_or_1 132 133 // q now accurate enough for a remainder r, 0<=r<3*d 134 MULLU R(q), R(r), (R(q),R(s)) // q = (r * q) >> 32 135 ADD R(M), R(r), R(r) // r = n - d 136 MULA R(M), R(q), R(r), R(r) // r = n - (q+1)*d 137 138 // since 0 <= n-q*d < 3*d; thus -d <= r < 2*d 139 CMN R(M), R(r) // t = r-d 140 SUB.CS R(M), R(r), R(r) // if (t<-d || t>=0) r=r+d 141 ADD.CC $1, R(q) 142 ADD.PL R(M)<<1, R(r) 143 ADD.PL $2, R(q) 144 145 // return, can't use RET here or fast_udiv_tab will be dropped during linking 146 MOVW R14, R15 147 148 udiv_by_large_d: 149 // at this point we know d>=2^(31-6)=2^25 150 SUB $4, R(a), R(a) 151 RSB $0, R(s), R(s) 152 MOVW R(a)>>R(s), R(q) 153 MULLU R(q), R(r), (R(q),R(s)) 154 MULA R(M), R(q), R(r), R(r) 155 156 // q now accurate enough for a remainder r, 0<=r<4*d 157 CMN R(r)>>1, R(M) // if(r/2 >= d) 158 ADD.CS R(M)<<1, R(r) 159 ADD.CS $2, R(q) 160 CMN R(r), R(M) 161 ADD.CS R(M), R(r) 162 ADD.CS $1, R(q) 163 164 // return, can't use RET here or fast_udiv_tab will be dropped during linking 165 MOVW R14, R15 166 167 udiv_by_0_or_1: 168 // carry set if d==1, carry clear if d==0 169 MOVW.CS R(r), R(q) 170 MOVW.CS $0, R(r) 171 BL.CC runtime·panicdivide(SB) // no way back 172 173 // return, can't use RET here or fast_udiv_tab will be dropped during linking 174 MOVW R14, R15 175 176 fast_udiv_tab: 177 // var tab [64]byte 178 // tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) } 179 // laid out here as little-endian uint32s 180 WORD $0xf4f8fcff 181 WORD $0xe6eaedf0 182 WORD $0xdadde0e3 183 WORD $0xcfd2d4d7 184 WORD $0xc5c7cacc 185 WORD $0xbcbec0c3 186 WORD $0xb4b6b8ba 187 WORD $0xacaeb0b2 188 WORD $0xa5a7a8aa 189 WORD $0x9fa0a2a3 190 WORD $0x999a9c9d 191 WORD $0x93949697 192 WORD $0x8e8f9092 193 WORD $0x898a8c8d 194 WORD $0x85868788 195 WORD $0x81828384 196 197 // The linker will pass numerator in R(TMP), and it also 198 // expects the result in R(TMP) 199 TMP = 11 200 201 TEXT _divu(SB), NOSPLIT, $16 202 MOVW R(q), 4(R13) 203 MOVW R(r), 8(R13) 204 MOVW R(s), 12(R13) 205 MOVW R(M), 16(R13) 206 207 MOVW R(TMP), R(r) /* numerator */ 208 MOVW 0(FP), R(q) /* denominator */ 209 BL udiv<>(SB) 210 MOVW R(q), R(TMP) 211 MOVW 4(R13), R(q) 212 MOVW 8(R13), R(r) 213 MOVW 12(R13), R(s) 214 MOVW 16(R13), R(M) 215 RET 216 217 TEXT _modu(SB), NOSPLIT, $16 218 MOVW R(q), 4(R13) 219 MOVW R(r), 8(R13) 220 MOVW R(s), 12(R13) 221 MOVW R(M), 16(R13) 222 223 MOVW R(TMP), R(r) /* numerator */ 224 MOVW 0(FP), R(q) /* denominator */ 225 BL udiv<>(SB) 226 MOVW R(r), R(TMP) 227 MOVW 4(R13), R(q) 228 MOVW 8(R13), R(r) 229 MOVW 12(R13), R(s) 230 MOVW 16(R13), R(M) 231 RET 232 233 TEXT _div(SB),NOSPLIT,$16 234 MOVW R(q), 4(R13) 235 MOVW R(r), 8(R13) 236 MOVW R(s), 12(R13) 237 MOVW R(M), 16(R13) 238 MOVW R(TMP), R(r) /* numerator */ 239 MOVW 0(FP), R(q) /* denominator */ 240 CMP $0, R(r) 241 BGE d1 242 RSB $0, R(r), R(r) 243 CMP $0, R(q) 244 BGE d2 245 RSB $0, R(q), R(q) 246 d0: 247 BL udiv<>(SB) /* none/both neg */ 248 MOVW R(q), R(TMP) 249 B out 250 d1: 251 CMP $0, R(q) 252 BGE d0 253 RSB $0, R(q), R(q) 254 d2: 255 BL udiv<>(SB) /* one neg */ 256 RSB $0, R(q), R(TMP) 257 B out 258 259 TEXT _mod(SB),NOSPLIT,$16 260 MOVW R(q), 4(R13) 261 MOVW R(r), 8(R13) 262 MOVW R(s), 12(R13) 263 MOVW R(M), 16(R13) 264 MOVW R(TMP), R(r) /* numerator */ 265 MOVW 0(FP), R(q) /* denominator */ 266 CMP $0, R(q) 267 RSB.LT $0, R(q), R(q) 268 CMP $0, R(r) 269 BGE m1 270 RSB $0, R(r), R(r) 271 BL udiv<>(SB) /* neg numerator */ 272 RSB $0, R(r), R(TMP) 273 B out 274 m1: 275 BL udiv<>(SB) /* pos numerator */ 276 MOVW R(r), R(TMP) 277 out: 278 MOVW 4(R13), R(q) 279 MOVW 8(R13), R(r) 280 MOVW 12(R13), R(s) 281 MOVW 16(R13), R(M) 282 RET