github.com/zxy12/go_duplicate_112_new@v0.0.0-20200807091221-747231827200/src/runtime/vlop_arm.s (about) 1 // Inferno's libkern/vlop-arm.s 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/vlop-arm.s 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include "go_asm.h" 27 #include "go_tls.h" 28 #include "funcdata.h" 29 #include "textflag.h" 30 31 // func runtime·udiv(n, d uint32) (q, r uint32) 32 // compiler knowns the register usage of this function 33 // Reference: 34 // Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software 35 // Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740 36 #define Rq R0 // input d, output q 37 #define Rr R1 // input n, output r 38 #define Rs R2 // three temporary variables 39 #define RM R3 40 #define Ra R11 41 42 // Be careful: Ra == R11 will be used by the linker for synthesized instructions. 43 // Note: this function does not have a frame. If it ever needs a frame, 44 // the RET instruction will clobber R12 on nacl, and the compiler's register 45 // allocator needs to know. 46 TEXT runtime·udiv(SB),NOSPLIT|NOFRAME,$0 47 MOVBU internal∕cpu·ARM+const_offsetARMHasIDIVA(SB), Ra 48 CMP $0, Ra 49 BNE udiv_hardware 50 51 CLZ Rq, Rs // find normalizing shift 52 MOVW.S Rq<<Rs, Ra 53 MOVW $fast_udiv_tab<>-64(SB), RM 54 ADD.NE Ra>>25, RM, Ra // index by most significant 7 bits of divisor 55 MOVBU.NE (Ra), Ra 56 57 SUB.S $7, Rs 58 RSB $0, Rq, RM // M = -q 59 MOVW.PL Ra<<Rs, Rq 60 61 // 1st Newton iteration 62 MUL.PL RM, Rq, Ra // a = -q*d 63 BMI udiv_by_large_d 64 MULAWT Ra, Rq, Rq, Rq // q approx q-(q*q*d>>32) 65 TEQ RM->1, RM // check for d=0 or d=1 66 67 // 2nd Newton iteration 68 MUL.NE RM, Rq, Ra 69 MOVW.NE $0, Rs 70 MULAL.NE Rq, Ra, (Rq,Rs) 71 BEQ udiv_by_0_or_1 72 73 // q now accurate enough for a remainder r, 0<=r<3*d 74 MULLU Rq, Rr, (Rq,Rs) // q = (r * q) >> 32 75 ADD RM, Rr, Rr // r = n - d 76 MULA RM, Rq, Rr, Rr // r = n - (q+1)*d 77 78 // since 0 <= n-q*d < 3*d; thus -d <= r < 2*d 79 CMN RM, Rr // t = r-d 80 SUB.CS RM, Rr, Rr // if (t<-d || t>=0) r=r+d 81 ADD.CC $1, Rq 82 ADD.PL RM<<1, Rr 83 ADD.PL $2, Rq 84 RET 85 86 // use hardware divider 87 udiv_hardware: 88 DIVUHW Rq, Rr, Rs 89 MUL Rs, Rq, RM 90 RSB Rr, RM, Rr 91 MOVW Rs, Rq 92 RET 93 94 udiv_by_large_d: 95 // at this point we know d>=2^(31-6)=2^25 96 SUB $4, Ra, Ra 97 RSB $0, Rs, Rs 98 MOVW Ra>>Rs, Rq 99 MULLU Rq, Rr, (Rq,Rs) 100 MULA RM, Rq, Rr, Rr 101 102 // q now accurate enough for a remainder r, 0<=r<4*d 103 CMN Rr>>1, RM // if(r/2 >= d) 104 ADD.CS RM<<1, Rr 105 ADD.CS $2, Rq 106 CMN Rr, RM 107 ADD.CS RM, Rr 108 ADD.CS $1, Rq 109 RET 110 111 udiv_by_0_or_1: 112 // carry set if d==1, carry clear if d==0 113 BCC udiv_by_0 114 MOVW Rr, Rq 115 MOVW $0, Rr 116 RET 117 118 udiv_by_0: 119 MOVW $runtime·panicdivide(SB), R11 120 B (R11) 121 122 // var tab [64]byte 123 // tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) } 124 // laid out here as little-endian uint32s 125 DATA fast_udiv_tab<>+0x00(SB)/4, $0xf4f8fcff 126 DATA fast_udiv_tab<>+0x04(SB)/4, $0xe6eaedf0 127 DATA fast_udiv_tab<>+0x08(SB)/4, $0xdadde0e3 128 DATA fast_udiv_tab<>+0x0c(SB)/4, $0xcfd2d4d7 129 DATA fast_udiv_tab<>+0x10(SB)/4, $0xc5c7cacc 130 DATA fast_udiv_tab<>+0x14(SB)/4, $0xbcbec0c3 131 DATA fast_udiv_tab<>+0x18(SB)/4, $0xb4b6b8ba 132 DATA fast_udiv_tab<>+0x1c(SB)/4, $0xacaeb0b2 133 DATA fast_udiv_tab<>+0x20(SB)/4, $0xa5a7a8aa 134 DATA fast_udiv_tab<>+0x24(SB)/4, $0x9fa0a2a3 135 DATA fast_udiv_tab<>+0x28(SB)/4, $0x999a9c9d 136 DATA fast_udiv_tab<>+0x2c(SB)/4, $0x93949697 137 DATA fast_udiv_tab<>+0x30(SB)/4, $0x8e8f9092 138 DATA fast_udiv_tab<>+0x34(SB)/4, $0x898a8c8d 139 DATA fast_udiv_tab<>+0x38(SB)/4, $0x85868788 140 DATA fast_udiv_tab<>+0x3c(SB)/4, $0x81828384 141 GLOBL fast_udiv_tab<>(SB), RODATA, $64 142 143 // The linker will pass numerator in R8 144 #define Rn R8 145 // The linker expects the result in RTMP 146 #define RTMP R11 147 148 TEXT runtime·_divu(SB), NOSPLIT, $16-0 149 // It's not strictly true that there are no local pointers. 150 // It could be that the saved registers Rq, Rr, Rs, and Rm 151 // contain pointers. However, the only way this can matter 152 // is if the stack grows (which it can't, udiv is nosplit) 153 // or if a fault happens and more frames are added to 154 // the stack due to deferred functions. 155 // In the latter case, the stack can grow arbitrarily, 156 // and garbage collection can happen, and those 157 // operations care about pointers, but in that case 158 // the calling frame is dead, and so are the saved 159 // registers. So we can claim there are no pointers here. 160 NO_LOCAL_POINTERS 161 MOVW Rq, 4(R13) 162 MOVW Rr, 8(R13) 163 MOVW Rs, 12(R13) 164 MOVW RM, 16(R13) 165 166 MOVW Rn, Rr /* numerator */ 167 MOVW g_m(g), Rq 168 MOVW m_divmod(Rq), Rq /* denominator */ 169 BL runtime·udiv(SB) 170 MOVW Rq, RTMP 171 MOVW 4(R13), Rq 172 MOVW 8(R13), Rr 173 MOVW 12(R13), Rs 174 MOVW 16(R13), RM 175 RET 176 177 TEXT runtime·_modu(SB), NOSPLIT, $16-0 178 NO_LOCAL_POINTERS 179 MOVW Rq, 4(R13) 180 MOVW Rr, 8(R13) 181 MOVW Rs, 12(R13) 182 MOVW RM, 16(R13) 183 184 MOVW Rn, Rr /* numerator */ 185 MOVW g_m(g), Rq 186 MOVW m_divmod(Rq), Rq /* denominator */ 187 BL runtime·udiv(SB) 188 MOVW Rr, RTMP 189 MOVW 4(R13), Rq 190 MOVW 8(R13), Rr 191 MOVW 12(R13), Rs 192 MOVW 16(R13), RM 193 RET 194 195 TEXT runtime·_div(SB),NOSPLIT,$16-0 196 NO_LOCAL_POINTERS 197 MOVW Rq, 4(R13) 198 MOVW Rr, 8(R13) 199 MOVW Rs, 12(R13) 200 MOVW RM, 16(R13) 201 MOVW Rn, Rr /* numerator */ 202 MOVW g_m(g), Rq 203 MOVW m_divmod(Rq), Rq /* denominator */ 204 CMP $0, Rr 205 BGE d1 206 RSB $0, Rr, Rr 207 CMP $0, Rq 208 BGE d2 209 RSB $0, Rq, Rq 210 d0: 211 BL runtime·udiv(SB) /* none/both neg */ 212 MOVW Rq, RTMP 213 B out1 214 d1: 215 CMP $0, Rq 216 BGE d0 217 RSB $0, Rq, Rq 218 d2: 219 BL runtime·udiv(SB) /* one neg */ 220 RSB $0, Rq, RTMP 221 out1: 222 MOVW 4(R13), Rq 223 MOVW 8(R13), Rr 224 MOVW 12(R13), Rs 225 MOVW 16(R13), RM 226 RET 227 228 TEXT runtime·_mod(SB),NOSPLIT,$16-0 229 NO_LOCAL_POINTERS 230 MOVW Rq, 4(R13) 231 MOVW Rr, 8(R13) 232 MOVW Rs, 12(R13) 233 MOVW RM, 16(R13) 234 MOVW Rn, Rr /* numerator */ 235 MOVW g_m(g), Rq 236 MOVW m_divmod(Rq), Rq /* denominator */ 237 CMP $0, Rq 238 RSB.LT $0, Rq, Rq 239 CMP $0, Rr 240 BGE m1 241 RSB $0, Rr, Rr 242 BL runtime·udiv(SB) /* neg numerator */ 243 RSB $0, Rr, RTMP 244 B out 245 m1: 246 BL runtime·udiv(SB) /* pos numerator */ 247 MOVW Rr, RTMP 248 out: 249 MOVW 4(R13), Rq 250 MOVW 8(R13), Rr 251 MOVW 12(R13), Rs 252 MOVW 16(R13), RM 253 RET 254 255 // _mul64by32 and _div64by32 not implemented on arm 256 TEXT runtime·_mul64by32(SB), NOSPLIT, $0 257 MOVW $0, R0 258 MOVW (R0), R1 // crash 259 260 TEXT runtime·_div64by32(SB), NOSPLIT, $0 261 MOVW $0, R0 262 MOVW (R0), R1 // crash