github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/vlop_arm.s (about)

     1  // Inferno's libkern/vlop-arm.s
     2  // http://code.google.com/p/inferno-os/source/browse/libkern/vlop-arm.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  #include "zasm_GOOS_GOARCH.h"
    27  #include "../../cmd/ld/textflag.h"
    28  
    29  arg=0
    30  
    31  /* replaced use of R10 by R11 because the former can be the data segment base register */
    32  
    33  TEXT _mulv(SB), NOSPLIT, $0
    34  	MOVW	0(FP), R0
    35  	MOVW	4(FP), R2	/* l0 */
    36  	MOVW	8(FP), R11	/* h0 */
    37  	MOVW	12(FP), R4	/* l1 */
    38  	MOVW	16(FP), R5	/* h1 */
    39  	MULLU	R4, R2, (R7,R6)
    40  	MUL	R11, R4, R8
    41  	ADD	R8, R7
    42  	MUL	R2, R5, R8
    43  	ADD	R8, R7
    44  	MOVW	R6, 0(R(arg))
    45  	MOVW	R7, 4(R(arg))
    46  	RET
    47  
    48  // trampoline for _sfloat2. passes LR as arg0 and
    49  // saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can
    50  // be changed by _sfloat2.
    51  TEXT _sfloat(SB), NOSPLIT, $64-0 // 4 arg + 14*4 saved regs + cpsr
    52  	MOVW	R14, 4(R13)
    53  	MOVW	R0, 8(R13)
    54  	MOVW	$12(R13), R0
    55  	MOVM.IA.W	[R1-R12], (R0)
    56  	MOVW	$68(R13), R1 // correct for frame size
    57  	MOVW	R1, 60(R13)
    58  	WORD	$0xe10f1000 // mrs r1, cpsr
    59  	MOVW	R1, 64(R13)
    60  	// Disable preemption of this goroutine during _sfloat2 by
    61  	// m->locks++ and m->locks-- around the call.
    62  	// Rescheduling this goroutine may cause the loss of the
    63  	// contents of the software floating point registers in 
    64  	// m->freghi, m->freglo, m->fflag, if the goroutine is moved
    65  	// to a different m or another goroutine runs on this m.
    66  	// Rescheduling at ordinary function calls is okay because
    67  	// all registers are caller save, but _sfloat2 and the things
    68  	// that it runs are simulating the execution of individual
    69  	// program instructions, and those instructions do not expect
    70  	// the floating point registers to be lost.
    71  	// An alternative would be to move the software floating point
    72  	// registers into G, but they do not need to be kept at the 
    73  	// usual places a goroutine reschedules (at function calls),
    74  	// so it would be a waste of 132 bytes per G.
    75  	MOVW	m_locks(m), R1
    76  	ADD	$1, R1
    77  	MOVW	R1, m_locks(m)
    78  	BL	runtime·_sfloat2(SB)
    79  	MOVW	m_locks(m), R1
    80  	SUB	$1, R1
    81  	MOVW	R1, m_locks(m)
    82  	MOVW	R0, 0(R13)
    83  	MOVW	64(R13), R1
    84  	WORD	$0xe128f001	// msr cpsr_f, r1
    85  	MOVW	$12(R13), R0
    86  	// Restore R1-R8 and R11-R12, but ignore the saved R9 (m) and R10 (g).
    87  	// Both are maintained by the runtime and always have correct values,
    88  	// so there is no need to restore old values here.
    89  	// The g should not have changed, but m may have, if we were preempted
    90  	// and restarted on a different thread, in which case restoring the old
    91  	// value is incorrect and will cause serious confusion in the runtime.
    92  	MOVM.IA.W	(R0), [R1-R8]
    93  	MOVW	$52(R13), R0
    94  	MOVM.IA.W	(R0), [R11-R12]
    95  	MOVW	8(R13), R0
    96  	RET
    97  
    98  // func udiv(n, d uint32) (q, r uint32)
    99  // Reference: 
   100  // Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software
   101  // Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740
   102  q = 0 // input d, output q
   103  r = 1 // input n, output r
   104  s = 2 // three temporary variables
   105  M = 3
   106  a = 11
   107  // Be careful: R(a) == R11 will be used by the linker for synthesized instructions.
   108  TEXT udiv<>(SB),NOSPLIT,$-4
   109  	CLZ 	R(q), R(s) // find normalizing shift
   110  	MOVW.S	R(q)<<R(s), R(a)
   111  	MOVW	$fast_udiv_tab<>-64(SB), R(M)
   112  	MOVBU.NE	R(a)>>25(R(M)), R(a) // index by most significant 7 bits of divisor
   113  
   114  	SUB.S	$7, R(s)
   115  	RSB 	$0, R(q), R(M) // M = -q
   116  	MOVW.PL	R(a)<<R(s), R(q)
   117  
   118  	// 1st Newton iteration
   119  	MUL.PL	R(M), R(q), R(a) // a = -q*d
   120  	BMI 	udiv_by_large_d
   121  	MULAWT	R(a), R(q), R(q), R(q) // q approx q-(q*q*d>>32)
   122  	TEQ 	R(M)->1, R(M) // check for d=0 or d=1
   123  
   124  	// 2nd Newton iteration
   125  	MUL.NE	R(M), R(q), R(a)
   126  	MOVW.NE	$0, R(s)
   127  	MULAL.NE R(q), R(a), (R(q),R(s))
   128  	BEQ 	udiv_by_0_or_1
   129  
   130  	// q now accurate enough for a remainder r, 0<=r<3*d
   131  	MULLU	R(q), R(r), (R(q),R(s)) // q = (r * q) >> 32	
   132  	ADD 	R(M), R(r), R(r) // r = n - d
   133  	MULA	R(M), R(q), R(r), R(r) // r = n - (q+1)*d
   134  
   135  	// since 0 <= n-q*d < 3*d; thus -d <= r < 2*d
   136  	CMN 	R(M), R(r) // t = r-d
   137  	SUB.CS	R(M), R(r), R(r) // if (t<-d || t>=0) r=r+d
   138  	ADD.CC	$1, R(q)
   139  	ADD.PL	R(M)<<1, R(r)
   140  	ADD.PL	$2, R(q)
   141  	RET
   142  
   143  udiv_by_large_d:
   144  	// at this point we know d>=2^(31-6)=2^25
   145  	SUB 	$4, R(a), R(a)
   146  	RSB 	$0, R(s), R(s)
   147  	MOVW	R(a)>>R(s), R(q)
   148  	MULLU	R(q), R(r), (R(q),R(s))
   149  	MULA	R(M), R(q), R(r), R(r)
   150  
   151  	// q now accurate enough for a remainder r, 0<=r<4*d
   152  	CMN 	R(r)>>1, R(M) // if(r/2 >= d)
   153  	ADD.CS	R(M)<<1, R(r)
   154  	ADD.CS	$2, R(q)
   155  	CMN 	R(r), R(M)
   156  	ADD.CS	R(M), R(r)
   157  	ADD.CS	$1, R(q)
   158  	RET
   159  
   160  udiv_by_0_or_1:
   161  	// carry set if d==1, carry clear if d==0
   162  	BCC udiv_by_0
   163  	MOVW	R(r), R(q)
   164  	MOVW	$0, R(r)
   165  	RET
   166  
   167  udiv_by_0:
   168  	// The ARM toolchain expects it can emit references to DIV and MOD
   169  	// instructions. The linker rewrites each pseudo-instruction into
   170  	// a sequence that pushes two values onto the stack and then calls
   171  	// _divu, _modu, _div, or _mod (below), all of which have a 16-byte
   172  	// frame plus the saved LR. The traceback routine knows the expanded
   173  	// stack frame size at the pseudo-instruction call site, but it
   174  	// doesn't know that the frame has a non-standard layout. In particular,
   175  	// it expects to find a saved LR in the bottom word of the frame.
   176  	// Unwind the stack back to the pseudo-instruction call site, copy the
   177  	// saved LR where the traceback routine will look for it, and make it
   178  	// appear that panicdivide was called from that PC.
   179  	MOVW	0(R13), LR
   180  	ADD	$20, R13
   181  	MOVW	8(R13), R1 // actual saved LR
   182  	MOVW	R1, 0(R13) // expected here for traceback
   183  	B 	runtime·panicdivide(SB)
   184  
   185  TEXT fast_udiv_tab<>(SB),NOSPLIT,$-4
   186  	// var tab [64]byte
   187  	// tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) }
   188  	// laid out here as little-endian uint32s
   189  	WORD $0xf4f8fcff
   190  	WORD $0xe6eaedf0
   191  	WORD $0xdadde0e3
   192  	WORD $0xcfd2d4d7
   193  	WORD $0xc5c7cacc
   194  	WORD $0xbcbec0c3
   195  	WORD $0xb4b6b8ba
   196  	WORD $0xacaeb0b2
   197  	WORD $0xa5a7a8aa
   198  	WORD $0x9fa0a2a3
   199  	WORD $0x999a9c9d
   200  	WORD $0x93949697
   201  	WORD $0x8e8f9092
   202  	WORD $0x898a8c8d
   203  	WORD $0x85868788
   204  	WORD $0x81828384
   205  
   206  // The linker will pass numerator in R(TMP), and it also
   207  // expects the result in R(TMP)
   208  TMP = 11
   209  
   210  TEXT _divu(SB), NOSPLIT, $16
   211  	MOVW	R(q), 4(R13)
   212  	MOVW	R(r), 8(R13)
   213  	MOVW	R(s), 12(R13)
   214  	MOVW	R(M), 16(R13)
   215  
   216  	MOVW	R(TMP), R(r)		/* numerator */
   217  	MOVW	0(FP), R(q) 		/* denominator */
   218  	BL  	udiv<>(SB)
   219  	MOVW	R(q), R(TMP)
   220  	MOVW	4(R13), R(q)
   221  	MOVW	8(R13), R(r)
   222  	MOVW	12(R13), R(s)
   223  	MOVW	16(R13), R(M)
   224  	RET
   225  
   226  TEXT _modu(SB), NOSPLIT, $16
   227  	MOVW	R(q), 4(R13)
   228  	MOVW	R(r), 8(R13)
   229  	MOVW	R(s), 12(R13)
   230  	MOVW	R(M), 16(R13)
   231  
   232  	MOVW	R(TMP), R(r)		/* numerator */
   233  	MOVW	0(FP), R(q) 		/* denominator */
   234  	BL  	udiv<>(SB)
   235  	MOVW	R(r), R(TMP)
   236  	MOVW	4(R13), R(q)
   237  	MOVW	8(R13), R(r)
   238  	MOVW	12(R13), R(s)
   239  	MOVW	16(R13), R(M)
   240  	RET
   241  
   242  TEXT _div(SB),NOSPLIT,$16
   243  	MOVW	R(q), 4(R13)
   244  	MOVW	R(r), 8(R13)
   245  	MOVW	R(s), 12(R13)
   246  	MOVW	R(M), 16(R13)
   247  	MOVW	R(TMP), R(r)		/* numerator */
   248  	MOVW	0(FP), R(q) 		/* denominator */
   249  	CMP 	$0, R(r)
   250  	BGE 	d1
   251  	RSB 	$0, R(r), R(r)
   252  	CMP 	$0, R(q)
   253  	BGE 	d2
   254  	RSB 	$0, R(q), R(q)
   255  d0:
   256  	BL  	udiv<>(SB)  		/* none/both neg */
   257  	MOVW	R(q), R(TMP)
   258  	B		out
   259  d1:
   260  	CMP 	$0, R(q)
   261  	BGE 	d0
   262  	RSB 	$0, R(q), R(q)
   263  d2:
   264  	BL  	udiv<>(SB)  		/* one neg */
   265  	RSB		$0, R(q), R(TMP)
   266  	B   	out
   267  
   268  TEXT _mod(SB),NOSPLIT,$16
   269  	MOVW	R(q), 4(R13)
   270  	MOVW	R(r), 8(R13)
   271  	MOVW	R(s), 12(R13)
   272  	MOVW	R(M), 16(R13)
   273  	MOVW	R(TMP), R(r)		/* numerator */
   274  	MOVW	0(FP), R(q) 		/* denominator */
   275  	CMP 	$0, R(q)
   276  	RSB.LT	$0, R(q), R(q)
   277  	CMP 	$0, R(r)
   278  	BGE 	m1
   279  	RSB 	$0, R(r), R(r)
   280  	BL  	udiv<>(SB)  		/* neg numerator */
   281  	RSB 	$0, R(r), R(TMP)
   282  	B   	out
   283  m1:
   284  	BL  	udiv<>(SB)  		/* pos numerator */
   285  	MOVW	R(r), R(TMP)
   286  out:
   287  	MOVW	4(R13), R(q)
   288  	MOVW	8(R13), R(r)
   289  	MOVW	12(R13), R(s)
   290  	MOVW	16(R13), R(M)
   291  	RET