github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/vlop_arm.s (about) 1 // Inferno's libkern/vlop-arm.s 2 // http://code.google.com/p/inferno-os/source/browse/libkern/vlop-arm.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 arg=0 27 28 /* replaced use of R10 by R11 because the former can be the data segment base register */ 29 30 TEXT _mulv(SB), $0 31 MOVW 0(FP), R0 32 MOVW 4(FP), R2 /* l0 */ 33 MOVW 8(FP), R11 /* h0 */ 34 MOVW 12(FP), R4 /* l1 */ 35 MOVW 16(FP), R5 /* h1 */ 36 MULLU R4, R2, (R7,R6) 37 MUL R11, R4, R8 38 ADD R8, R7 39 MUL R2, R5, R8 40 ADD R8, R7 41 MOVW R6, 0(R(arg)) 42 MOVW R7, 4(R(arg)) 43 RET 44 45 // trampoline for _sfloat2. passes LR as arg0 and 46 // saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can 47 // be changed by _sfloat2. 48 TEXT _sfloat(SB), 7, $64 // 4 arg + 14*4 saved regs + cpsr 49 MOVW R14, 4(R13) 50 MOVW R0, 8(R13) 51 MOVW $12(R13), R0 52 MOVM.IA.W [R1-R12], (R0) 53 MOVW $68(R13), R1 // correct for frame size 54 MOVW R1, 60(R13) 55 WORD $0xe10f1000 // mrs r1, cpsr 56 MOVW R1, 64(R13) 57 BL runtime·_sfloat2(SB) 58 MOVW R0, 0(R13) 59 MOVW 64(R13), R1 60 WORD $0xe128f001 // msr cpsr_f, r1 61 MOVW $12(R13), R0 62 MOVM.IA.W (R0), [R1-R12] 63 MOVW 8(R13), R0 64 RET 65 66 // func udiv(n, d uint32) (q, r uint32) 67 // Reference: 68 // Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software 69 // Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740 70 q = 0 // input d, output q 71 r = 1 // input n, output r 72 s = 2 // three temporary variables 73 m = 3 74 a = 11 75 // Please be careful when changing this, it is pretty fragile: 76 // 1, don't use unconditional branch as the linker is free to reorder the blocks; 77 // 2. if a == 11, beware that the linker will use R11 if you use certain instructions. 78 TEXT udiv<>(SB),7,$-4 79 CLZ R(q), R(s) // find normalizing shift 80 MOVW.S R(q)<<R(s), R(a) 81 ADD R(a)>>25, PC, R(a) // most significant 7 bits of divisor 82 MOVBU.NE (4*36-64)(R(a)), R(a) // 36 == number of inst. between fast_udiv_tab and begin 83 84 begin: 85 SUB.S $7, R(s) 86 RSB $0, R(q), R(m) // m = -q 87 MOVW.PL R(a)<<R(s), R(q) 88 89 // 1st Newton iteration 90 MUL.PL R(m), R(q), R(a) // a = -q*d 91 BMI udiv_by_large_d 92 MULAWT R(a), R(q), R(q), R(q) // q approx q-(q*q*d>>32) 93 TEQ R(m)->1, R(m) // check for d=0 or d=1 94 95 // 2nd Newton iteration 96 MUL.NE R(m), R(q), R(a) 97 MOVW.NE $0, R(s) 98 MULAL.NE R(q), R(a), (R(q),R(s)) 99 BEQ udiv_by_0_or_1 100 101 // q now accurate enough for a remainder r, 0<=r<3*d 102 MULLU R(q), R(r), (R(q),R(s)) // q = (r * q) >> 32 103 ADD R(m), R(r), R(r) // r = n - d 104 MULA R(m), R(q), R(r), R(r) // r = n - (q+1)*d 105 106 // since 0 <= n-q*d < 3*d; thus -d <= r < 2*d 107 CMN R(m), R(r) // t = r-d 108 SUB.CS R(m), R(r), R(r) // if (t<-d || t>=0) r=r+d 109 ADD.CC $1, R(q) 110 ADD.PL R(m)<<1, R(r) 111 ADD.PL $2, R(q) 112 113 // return, can't use RET here or fast_udiv_tab will be dropped during linking 114 MOVW R14, R15 115 116 udiv_by_large_d: 117 // at this point we know d>=2^(31-6)=2^25 118 SUB $4, R(a), R(a) 119 RSB $0, R(s), R(s) 120 MOVW R(a)>>R(s), R(q) 121 MULLU R(q), R(r), (R(q),R(s)) 122 MULA R(m), R(q), R(r), R(r) 123 124 // q now accurate enough for a remainder r, 0<=r<4*d 125 CMN R(r)>>1, R(m) // if(r/2 >= d) 126 ADD.CS R(m)<<1, R(r) 127 ADD.CS $2, R(q) 128 CMN R(r), R(m) 129 ADD.CS R(m), R(r) 130 ADD.CS $1, R(q) 131 132 // return, can't use RET here or fast_udiv_tab will be dropped during linking 133 MOVW R14, R15 134 135 udiv_by_0_or_1: 136 // carry set if d==1, carry clear if d==0 137 MOVW.CS R(r), R(q) 138 MOVW.CS $0, R(r) 139 BL.CC runtime·panicdivide(SB) // no way back 140 141 // return, can't use RET here or fast_udiv_tab will be dropped during linking 142 MOVW R14, R15 143 144 fast_udiv_tab: 145 // var tab [64]byte 146 // tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) } 147 // laid out here as little-endian uint32s 148 WORD $0xf4f8fcff 149 WORD $0xe6eaedf0 150 WORD $0xdadde0e3 151 WORD $0xcfd2d4d7 152 WORD $0xc5c7cacc 153 WORD $0xbcbec0c3 154 WORD $0xb4b6b8ba 155 WORD $0xacaeb0b2 156 WORD $0xa5a7a8aa 157 WORD $0x9fa0a2a3 158 WORD $0x999a9c9d 159 WORD $0x93949697 160 WORD $0x8e8f9092 161 WORD $0x898a8c8d 162 WORD $0x85868788 163 WORD $0x81828384 164 165 // The linker will pass numerator in R(TMP), and it also 166 // expects the result in R(TMP) 167 TMP = 11 168 169 TEXT _divu(SB), 7, $16 170 MOVW R(q), 4(R13) 171 MOVW R(r), 8(R13) 172 MOVW R(s), 12(R13) 173 MOVW R(m), 16(R13) 174 175 MOVW R(TMP), R(r) /* numerator */ 176 MOVW 0(FP), R(q) /* denominator */ 177 BL udiv<>(SB) 178 MOVW R(q), R(TMP) 179 MOVW 4(R13), R(q) 180 MOVW 8(R13), R(r) 181 MOVW 12(R13), R(s) 182 MOVW 16(R13), R(m) 183 RET 184 185 TEXT _modu(SB), 7, $16 186 MOVW R(q), 4(R13) 187 MOVW R(r), 8(R13) 188 MOVW R(s), 12(R13) 189 MOVW R(m), 16(R13) 190 191 MOVW R(TMP), R(r) /* numerator */ 192 MOVW 0(FP), R(q) /* denominator */ 193 BL udiv<>(SB) 194 MOVW R(r), R(TMP) 195 MOVW 4(R13), R(q) 196 MOVW 8(R13), R(r) 197 MOVW 12(R13), R(s) 198 MOVW 16(R13), R(m) 199 RET 200 201 TEXT _div(SB),7,$16 202 MOVW R(q), 4(R13) 203 MOVW R(r), 8(R13) 204 MOVW R(s), 12(R13) 205 MOVW R(m), 16(R13) 206 MOVW R(TMP), R(r) /* numerator */ 207 MOVW 0(FP), R(q) /* denominator */ 208 CMP $0, R(r) 209 BGE d1 210 RSB $0, R(r), R(r) 211 CMP $0, R(q) 212 BGE d2 213 RSB $0, R(q), R(q) 214 d0: 215 BL udiv<>(SB) /* none/both neg */ 216 MOVW R(q), R(TMP) 217 B out 218 d1: 219 CMP $0, R(q) 220 BGE d0 221 RSB $0, R(q), R(q) 222 d2: 223 BL udiv<>(SB) /* one neg */ 224 RSB $0, R(q), R(TMP) 225 B out 226 227 TEXT _mod(SB),7,$16 228 MOVW R(q), 4(R13) 229 MOVW R(r), 8(R13) 230 MOVW R(s), 12(R13) 231 MOVW R(m), 16(R13) 232 MOVW R(TMP), R(r) /* numerator */ 233 MOVW 0(FP), R(q) /* denominator */ 234 CMP $0, R(q) 235 RSB.LT $0, R(q), R(q) 236 CMP $0, R(r) 237 BGE m1 238 RSB $0, R(r), R(r) 239 BL udiv<>(SB) /* neg numerator */ 240 RSB $0, R(r), R(TMP) 241 B out 242 m1: 243 BL udiv<>(SB) /* pos numerator */ 244 MOVW R(r), R(TMP) 245 out: 246 MOVW 4(R13), R(q) 247 MOVW 8(R13), R(r) 248 MOVW 12(R13), R(s) 249 MOVW 16(R13), R(m) 250 RET