github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/vlop_arm.s

github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/runtime/vlop_arm.s (about)

     1  // Inferno's libkern/vlop-arm.s
     2  // http://code.google.com/p/inferno-os/source/browse/libkern/vlop-arm.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  #include "zasm_GOOS_GOARCH.h"
    27  #include "../../cmd/ld/textflag.h"
    28  
    29  arg=0
    30  
    31  /* replaced use of R10 by R11 because the former can be the data segment base register */
    32  
    33  TEXT _mulv(SB), NOSPLIT, $0
    34  	MOVW	0(FP), R0
    35  	MOVW	4(FP), R2	/* l0 */
    36  	MOVW	8(FP), R11	/* h0 */
    37  	MOVW	12(FP), R4	/* l1 */
    38  	MOVW	16(FP), R5	/* h1 */
    39  	MULLU	R4, R2, (R7,R6)
    40  	MUL	R11, R4, R8
    41  	ADD	R8, R7
    42  	MUL	R2, R5, R8
    43  	ADD	R8, R7
    44  	MOVW	R6, 0(R(arg))
    45  	MOVW	R7, 4(R(arg))
    46  	RET
    47  
    48  // trampoline for _sfloat2. passes LR as arg0 and
    49  // saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can
    50  // be changed by _sfloat2.
    51  TEXT _sfloat(SB), NOSPLIT, $64-0 // 4 arg + 14*4 saved regs + cpsr
    52  	MOVW	R14, 4(R13)
    53  	MOVW	R0, 8(R13)
    54  	MOVW	$12(R13), R0
    55  	MOVM.IA.W	[R1-R12], (R0)
    56  	MOVW	$68(R13), R1 // correct for frame size
    57  	MOVW	R1, 60(R13)
    58  	WORD	$0xe10f1000 // mrs r1, cpsr
    59  	MOVW	R1, 64(R13)
    60  	// Disable preemption of this goroutine during _sfloat2 by
    61  	// m->locks++ and m->locks-- around the call.
    62  	// Rescheduling this goroutine may cause the loss of the
    63  	// contents of the software floating point registers in 
    64  	// m->freghi, m->freglo, m->fflag, if the goroutine is moved
    65  	// to a different m or another goroutine runs on this m.
    66  	// Rescheduling at ordinary function calls is okay because
    67  	// all registers are caller save, but _sfloat2 and the things
    68  	// that it runs are simulating the execution of individual
    69  	// program instructions, and those instructions do not expect
    70  	// the floating point registers to be lost.
    71  	// An alternative would be to move the software floating point
    72  	// registers into G, but they do not need to be kept at the 
    73  	// usual places a goroutine reschedules (at function calls),
    74  	// so it would be a waste of 132 bytes per G.
    75  	MOVW	m_locks(m), R1
    76  	ADD	$1, R1
    77  	MOVW	R1, m_locks(m)
    78  	BL	runtime·_sfloat2(SB)
    79  	MOVW	m_locks(m), R1
    80  	SUB	$1, R1
    81  	MOVW	R1, m_locks(m)
    82  	MOVW	R0, 0(R13)
    83  	MOVW	64(R13), R1
    84  	WORD	$0xe128f001	// msr cpsr_f, r1
    85  	MOVW	$12(R13), R0
    86  	// Restore R1-R8 and R11-R12, but ignore the saved R9 (m) and R10 (g).
    87  	// Both are maintained by the runtime and always have correct values,
    88  	// so there is no need to restore old values here.
    89  	// The g should not have changed, but m may have, if we were preempted
    90  	// and restarted on a different thread, in which case restoring the old
    91  	// value is incorrect and will cause serious confusion in the runtime.
    92  	MOVM.IA.W	(R0), [R1-R8]
    93  	MOVW	$52(R13), R0
    94  	MOVM.IA.W	(R0), [R11-R12]
    95  	MOVW	8(R13), R0
    96  	RET
    97  
    98  // func udiv(n, d uint32) (q, r uint32)
    99  // Reference: 
   100  // Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software
   101  // Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740
   102  q = 0 // input d, output q
   103  r = 1 // input n, output r
   104  s = 2 // three temporary variables
   105  M = 3
   106  a = 11
   107  // Please be careful when changing this, it is pretty fragile:
   108  // 1, don't use unconditional branch as the linker is free to reorder the blocks;
   109  // 2. if a == 11, beware that the linker will use R11 if you use certain instructions.
   110  TEXT udiv<>(SB),NOSPLIT,$-4
   111  	CLZ 	R(q), R(s) // find normalizing shift
   112  	MOVW.S	R(q)<<R(s), R(a)
   113  	ADD 	R(a)>>25, PC, R(a) // most significant 7 bits of divisor
   114  	MOVBU.NE	(4*36-64)(R(a)), R(a) // 36 == number of inst. between fast_udiv_tab and begin
   115  
   116  begin:
   117  	SUB.S	$7, R(s)
   118  	RSB 	$0, R(q), R(M) // M = -q
   119  	MOVW.PL	R(a)<<R(s), R(q)
   120  
   121  	// 1st Newton iteration
   122  	MUL.PL	R(M), R(q), R(a) // a = -q*d
   123  	BMI 	udiv_by_large_d
   124  	MULAWT	R(a), R(q), R(q), R(q) // q approx q-(q*q*d>>32)
   125  	TEQ 	R(M)->1, R(M) // check for d=0 or d=1
   126  
   127  	// 2nd Newton iteration
   128  	MUL.NE	R(M), R(q), R(a)
   129  	MOVW.NE	$0, R(s)
   130  	MULAL.NE R(q), R(a), (R(q),R(s))
   131  	BEQ 	udiv_by_0_or_1
   132  
   133  	// q now accurate enough for a remainder r, 0<=r<3*d
   134  	MULLU	R(q), R(r), (R(q),R(s)) // q = (r * q) >> 32	
   135  	ADD 	R(M), R(r), R(r) // r = n - d
   136  	MULA	R(M), R(q), R(r), R(r) // r = n - (q+1)*d
   137  
   138  	// since 0 <= n-q*d < 3*d; thus -d <= r < 2*d
   139  	CMN 	R(M), R(r) // t = r-d
   140  	SUB.CS	R(M), R(r), R(r) // if (t<-d || t>=0) r=r+d
   141  	ADD.CC	$1, R(q)
   142  	ADD.PL	R(M)<<1, R(r)
   143  	ADD.PL	$2, R(q)
   144  
   145  	// return, can't use RET here or fast_udiv_tab will be dropped during linking
   146  	MOVW	R14, R15
   147  
   148  udiv_by_large_d:
   149  	// at this point we know d>=2^(31-6)=2^25
   150  	SUB 	$4, R(a), R(a)
   151  	RSB 	$0, R(s), R(s)
   152  	MOVW	R(a)>>R(s), R(q)
   153  	MULLU	R(q), R(r), (R(q),R(s))
   154  	MULA	R(M), R(q), R(r), R(r)
   155  
   156  	// q now accurate enough for a remainder r, 0<=r<4*d
   157  	CMN 	R(r)>>1, R(M) // if(r/2 >= d)
   158  	ADD.CS	R(M)<<1, R(r)
   159  	ADD.CS	$2, R(q)
   160  	CMN 	R(r), R(M)
   161  	ADD.CS	R(M), R(r)
   162  	ADD.CS	$1, R(q)
   163  
   164  	// return, can't use RET here or fast_udiv_tab will be dropped during linking
   165  	MOVW	R14, R15
   166  
   167  udiv_by_0_or_1:
   168  	// carry set if d==1, carry clear if d==0
   169  	MOVW.CS	R(r), R(q)
   170  	MOVW.CS	$0, R(r)
   171  	BL.CC 	runtime·panicdivide(SB) // no way back
   172  
   173  	// return, can't use RET here or fast_udiv_tab will be dropped during linking
   174  	MOVW	R14, R15
   175  
   176  fast_udiv_tab:
   177  	// var tab [64]byte
   178  	// tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) }
   179  	// laid out here as little-endian uint32s
   180  	WORD $0xf4f8fcff
   181  	WORD $0xe6eaedf0
   182  	WORD $0xdadde0e3
   183  	WORD $0xcfd2d4d7
   184  	WORD $0xc5c7cacc
   185  	WORD $0xbcbec0c3
   186  	WORD $0xb4b6b8ba
   187  	WORD $0xacaeb0b2
   188  	WORD $0xa5a7a8aa
   189  	WORD $0x9fa0a2a3
   190  	WORD $0x999a9c9d
   191  	WORD $0x93949697
   192  	WORD $0x8e8f9092
   193  	WORD $0x898a8c8d
   194  	WORD $0x85868788
   195  	WORD $0x81828384
   196  
   197  // The linker will pass numerator in R(TMP), and it also
   198  // expects the result in R(TMP)
   199  TMP = 11
   200  
   201  TEXT _divu(SB), NOSPLIT, $16
   202  	MOVW	R(q), 4(R13)
   203  	MOVW	R(r), 8(R13)
   204  	MOVW	R(s), 12(R13)
   205  	MOVW	R(M), 16(R13)
   206  
   207  	MOVW	R(TMP), R(r)		/* numerator */
   208  	MOVW	0(FP), R(q) 		/* denominator */
   209  	BL  	udiv<>(SB)
   210  	MOVW	R(q), R(TMP)
   211  	MOVW	4(R13), R(q)
   212  	MOVW	8(R13), R(r)
   213  	MOVW	12(R13), R(s)
   214  	MOVW	16(R13), R(M)
   215  	RET
   216  
   217  TEXT _modu(SB), NOSPLIT, $16
   218  	MOVW	R(q), 4(R13)
   219  	MOVW	R(r), 8(R13)
   220  	MOVW	R(s), 12(R13)
   221  	MOVW	R(M), 16(R13)
   222  
   223  	MOVW	R(TMP), R(r)		/* numerator */
   224  	MOVW	0(FP), R(q) 		/* denominator */
   225  	BL  	udiv<>(SB)
   226  	MOVW	R(r), R(TMP)
   227  	MOVW	4(R13), R(q)
   228  	MOVW	8(R13), R(r)
   229  	MOVW	12(R13), R(s)
   230  	MOVW	16(R13), R(M)
   231  	RET
   232  
   233  TEXT _div(SB),NOSPLIT,$16
   234  	MOVW	R(q), 4(R13)
   235  	MOVW	R(r), 8(R13)
   236  	MOVW	R(s), 12(R13)
   237  	MOVW	R(M), 16(R13)
   238  	MOVW	R(TMP), R(r)		/* numerator */
   239  	MOVW	0(FP), R(q) 		/* denominator */
   240  	CMP 	$0, R(r)
   241  	BGE 	d1
   242  	RSB 	$0, R(r), R(r)
   243  	CMP 	$0, R(q)
   244  	BGE 	d2
   245  	RSB 	$0, R(q), R(q)
   246  d0:
   247  	BL  	udiv<>(SB)  		/* none/both neg */
   248  	MOVW	R(q), R(TMP)
   249  	B		out
   250  d1:
   251  	CMP 	$0, R(q)
   252  	BGE 	d0
   253  	RSB 	$0, R(q), R(q)
   254  d2:
   255  	BL  	udiv<>(SB)  		/* one neg */
   256  	RSB		$0, R(q), R(TMP)
   257  	B   	out
   258  
   259  TEXT _mod(SB),NOSPLIT,$16
   260  	MOVW	R(q), 4(R13)
   261  	MOVW	R(r), 8(R13)
   262  	MOVW	R(s), 12(R13)
   263  	MOVW	R(M), 16(R13)
   264  	MOVW	R(TMP), R(r)		/* numerator */
   265  	MOVW	0(FP), R(q) 		/* denominator */
   266  	CMP 	$0, R(q)
   267  	RSB.LT	$0, R(q), R(q)
   268  	CMP 	$0, R(r)
   269  	BGE 	m1
   270  	RSB 	$0, R(r), R(r)
   271  	BL  	udiv<>(SB)  		/* neg numerator */
   272  	RSB 	$0, R(r), R(TMP)
   273  	B   	out
   274  m1:
   275  	BL  	udiv<>(SB)  		/* pos numerator */
   276  	MOVW	R(r), R(TMP)
   277  out:
   278  	MOVW	4(R13), R(q)
   279  	MOVW	8(R13), R(r)
   280  	MOVW	12(R13), R(s)
   281  	MOVW	16(R13), R(M)
   282  	RET