github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/vlop_arm.s (about) 1 // Inferno's libkern/vlop-arm.s 2 // http://code.google.com/p/inferno-os/source/browse/libkern/vlop-arm.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include "zasm_GOOS_GOARCH.h" 27 #include "../../cmd/ld/textflag.h" 28 29 arg=0 30 31 /* replaced use of R10 by R11 because the former can be the data segment base register */ 32 33 TEXT _mulv(SB), NOSPLIT, $0 34 MOVW 0(FP), R0 35 MOVW 4(FP), R2 /* l0 */ 36 MOVW 8(FP), R11 /* h0 */ 37 MOVW 12(FP), R4 /* l1 */ 38 MOVW 16(FP), R5 /* h1 */ 39 MULLU R4, R2, (R7,R6) 40 MUL R11, R4, R8 41 ADD R8, R7 42 MUL R2, R5, R8 43 ADD R8, R7 44 MOVW R6, 0(R(arg)) 45 MOVW R7, 4(R(arg)) 46 RET 47 48 // trampoline for _sfloat2. passes LR as arg0 and 49 // saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can 50 // be changed by _sfloat2. 51 TEXT _sfloat(SB), NOSPLIT, $64-0 // 4 arg + 14*4 saved regs + cpsr 52 MOVW R14, 4(R13) 53 MOVW R0, 8(R13) 54 MOVW $12(R13), R0 55 MOVM.IA.W [R1-R12], (R0) 56 MOVW $68(R13), R1 // correct for frame size 57 MOVW R1, 60(R13) 58 WORD $0xe10f1000 // mrs r1, cpsr 59 MOVW R1, 64(R13) 60 // Disable preemption of this goroutine during _sfloat2 by 61 // m->locks++ and m->locks-- around the call. 62 // Rescheduling this goroutine may cause the loss of the 63 // contents of the software floating point registers in 64 // m->freghi, m->freglo, m->fflag, if the goroutine is moved 65 // to a different m or another goroutine runs on this m. 66 // Rescheduling at ordinary function calls is okay because 67 // all registers are caller save, but _sfloat2 and the things 68 // that it runs are simulating the execution of individual 69 // program instructions, and those instructions do not expect 70 // the floating point registers to be lost. 71 // An alternative would be to move the software floating point 72 // registers into G, but they do not need to be kept at the 73 // usual places a goroutine reschedules (at function calls), 74 // so it would be a waste of 132 bytes per G. 75 MOVW m_locks(m), R1 76 ADD $1, R1 77 MOVW R1, m_locks(m) 78 BL runtime·_sfloat2(SB) 79 MOVW m_locks(m), R1 80 SUB $1, R1 81 MOVW R1, m_locks(m) 82 MOVW R0, 0(R13) 83 MOVW 64(R13), R1 84 WORD $0xe128f001 // msr cpsr_f, r1 85 MOVW $12(R13), R0 86 // Restore R1-R8 and R11-R12, but ignore the saved R9 (m) and R10 (g). 87 // Both are maintained by the runtime and always have correct values, 88 // so there is no need to restore old values here. 89 // The g should not have changed, but m may have, if we were preempted 90 // and restarted on a different thread, in which case restoring the old 91 // value is incorrect and will cause serious confusion in the runtime. 92 MOVM.IA.W (R0), [R1-R8] 93 MOVW $52(R13), R0 94 MOVM.IA.W (R0), [R11-R12] 95 MOVW 8(R13), R0 96 RET 97 98 // func udiv(n, d uint32) (q, r uint32) 99 // Reference: 100 // Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software 101 // Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740 102 q = 0 // input d, output q 103 r = 1 // input n, output r 104 s = 2 // three temporary variables 105 M = 3 106 a = 11 107 // Be careful: R(a) == R11 will be used by the linker for synthesized instructions. 108 TEXT udiv<>(SB),NOSPLIT,$-4 109 CLZ R(q), R(s) // find normalizing shift 110 MOVW.S R(q)<<R(s), R(a) 111 MOVW $fast_udiv_tab<>-64(SB), R(M) 112 MOVBU.NE R(a)>>25(R(M)), R(a) // index by most significant 7 bits of divisor 113 114 SUB.S $7, R(s) 115 RSB $0, R(q), R(M) // M = -q 116 MOVW.PL R(a)<<R(s), R(q) 117 118 // 1st Newton iteration 119 MUL.PL R(M), R(q), R(a) // a = -q*d 120 BMI udiv_by_large_d 121 MULAWT R(a), R(q), R(q), R(q) // q approx q-(q*q*d>>32) 122 TEQ R(M)->1, R(M) // check for d=0 or d=1 123 124 // 2nd Newton iteration 125 MUL.NE R(M), R(q), R(a) 126 MOVW.NE $0, R(s) 127 MULAL.NE R(q), R(a), (R(q),R(s)) 128 BEQ udiv_by_0_or_1 129 130 // q now accurate enough for a remainder r, 0<=r<3*d 131 MULLU R(q), R(r), (R(q),R(s)) // q = (r * q) >> 32 132 ADD R(M), R(r), R(r) // r = n - d 133 MULA R(M), R(q), R(r), R(r) // r = n - (q+1)*d 134 135 // since 0 <= n-q*d < 3*d; thus -d <= r < 2*d 136 CMN R(M), R(r) // t = r-d 137 SUB.CS R(M), R(r), R(r) // if (t<-d || t>=0) r=r+d 138 ADD.CC $1, R(q) 139 ADD.PL R(M)<<1, R(r) 140 ADD.PL $2, R(q) 141 RET 142 143 udiv_by_large_d: 144 // at this point we know d>=2^(31-6)=2^25 145 SUB $4, R(a), R(a) 146 RSB $0, R(s), R(s) 147 MOVW R(a)>>R(s), R(q) 148 MULLU R(q), R(r), (R(q),R(s)) 149 MULA R(M), R(q), R(r), R(r) 150 151 // q now accurate enough for a remainder r, 0<=r<4*d 152 CMN R(r)>>1, R(M) // if(r/2 >= d) 153 ADD.CS R(M)<<1, R(r) 154 ADD.CS $2, R(q) 155 CMN R(r), R(M) 156 ADD.CS R(M), R(r) 157 ADD.CS $1, R(q) 158 RET 159 160 udiv_by_0_or_1: 161 // carry set if d==1, carry clear if d==0 162 BCC udiv_by_0 163 MOVW R(r), R(q) 164 MOVW $0, R(r) 165 RET 166 167 udiv_by_0: 168 // The ARM toolchain expects it can emit references to DIV and MOD 169 // instructions. The linker rewrites each pseudo-instruction into 170 // a sequence that pushes two values onto the stack and then calls 171 // _divu, _modu, _div, or _mod (below), all of which have a 16-byte 172 // frame plus the saved LR. The traceback routine knows the expanded 173 // stack frame size at the pseudo-instruction call site, but it 174 // doesn't know that the frame has a non-standard layout. In particular, 175 // it expects to find a saved LR in the bottom word of the frame. 176 // Unwind the stack back to the pseudo-instruction call site, copy the 177 // saved LR where the traceback routine will look for it, and make it 178 // appear that panicdivide was called from that PC. 179 MOVW 0(R13), LR 180 ADD $20, R13 181 MOVW 8(R13), R1 // actual saved LR 182 MOVW R1, 0(R13) // expected here for traceback 183 B runtime·panicdivide(SB) 184 185 TEXT fast_udiv_tab<>(SB),NOSPLIT,$-4 186 // var tab [64]byte 187 // tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) } 188 // laid out here as little-endian uint32s 189 WORD $0xf4f8fcff 190 WORD $0xe6eaedf0 191 WORD $0xdadde0e3 192 WORD $0xcfd2d4d7 193 WORD $0xc5c7cacc 194 WORD $0xbcbec0c3 195 WORD $0xb4b6b8ba 196 WORD $0xacaeb0b2 197 WORD $0xa5a7a8aa 198 WORD $0x9fa0a2a3 199 WORD $0x999a9c9d 200 WORD $0x93949697 201 WORD $0x8e8f9092 202 WORD $0x898a8c8d 203 WORD $0x85868788 204 WORD $0x81828384 205 206 // The linker will pass numerator in R(TMP), and it also 207 // expects the result in R(TMP) 208 TMP = 11 209 210 TEXT _divu(SB), NOSPLIT, $16 211 MOVW R(q), 4(R13) 212 MOVW R(r), 8(R13) 213 MOVW R(s), 12(R13) 214 MOVW R(M), 16(R13) 215 216 MOVW R(TMP), R(r) /* numerator */ 217 MOVW 0(FP), R(q) /* denominator */ 218 BL udiv<>(SB) 219 MOVW R(q), R(TMP) 220 MOVW 4(R13), R(q) 221 MOVW 8(R13), R(r) 222 MOVW 12(R13), R(s) 223 MOVW 16(R13), R(M) 224 RET 225 226 TEXT _modu(SB), NOSPLIT, $16 227 MOVW R(q), 4(R13) 228 MOVW R(r), 8(R13) 229 MOVW R(s), 12(R13) 230 MOVW R(M), 16(R13) 231 232 MOVW R(TMP), R(r) /* numerator */ 233 MOVW 0(FP), R(q) /* denominator */ 234 BL udiv<>(SB) 235 MOVW R(r), R(TMP) 236 MOVW 4(R13), R(q) 237 MOVW 8(R13), R(r) 238 MOVW 12(R13), R(s) 239 MOVW 16(R13), R(M) 240 RET 241 242 TEXT _div(SB),NOSPLIT,$16 243 MOVW R(q), 4(R13) 244 MOVW R(r), 8(R13) 245 MOVW R(s), 12(R13) 246 MOVW R(M), 16(R13) 247 MOVW R(TMP), R(r) /* numerator */ 248 MOVW 0(FP), R(q) /* denominator */ 249 CMP $0, R(r) 250 BGE d1 251 RSB $0, R(r), R(r) 252 CMP $0, R(q) 253 BGE d2 254 RSB $0, R(q), R(q) 255 d0: 256 BL udiv<>(SB) /* none/both neg */ 257 MOVW R(q), R(TMP) 258 B out 259 d1: 260 CMP $0, R(q) 261 BGE d0 262 RSB $0, R(q), R(q) 263 d2: 264 BL udiv<>(SB) /* one neg */ 265 RSB $0, R(q), R(TMP) 266 B out 267 268 TEXT _mod(SB),NOSPLIT,$16 269 MOVW R(q), 4(R13) 270 MOVW R(r), 8(R13) 271 MOVW R(s), 12(R13) 272 MOVW R(M), 16(R13) 273 MOVW R(TMP), R(r) /* numerator */ 274 MOVW 0(FP), R(q) /* denominator */ 275 CMP $0, R(q) 276 RSB.LT $0, R(q), R(q) 277 CMP $0, R(r) 278 BGE m1 279 RSB $0, R(r), R(r) 280 BL udiv<>(SB) /* neg numerator */ 281 RSB $0, R(r), R(TMP) 282 B out 283 m1: 284 BL udiv<>(SB) /* pos numerator */ 285 MOVW R(r), R(TMP) 286 out: 287 MOVW 4(R13), R(q) 288 MOVW 8(R13), R(r) 289 MOVW 12(R13), R(s) 290 MOVW 16(R13), R(M) 291 RET