github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/vlop_arm.s (about)

     1  // Inferno's libkern/vlop-arm.s
     2  // http://code.google.com/p/inferno-os/source/browse/libkern/vlop-arm.s
     3  //
     4  //         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6  //         Portions Copyright 2009 The Go Authors. All rights reserved.
     7  //
     8  // Permission is hereby granted, free of charge, to any person obtaining a copy
     9  // of this software and associated documentation files (the "Software"), to deal
    10  // in the Software without restriction, including without limitation the rights
    11  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  // copies of the Software, and to permit persons to whom the Software is
    13  // furnished to do so, subject to the following conditions:
    14  //
    15  // The above copyright notice and this permission notice shall be included in
    16  // all copies or substantial portions of the Software.
    17  //
    18  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24  // THE SOFTWARE.
    25  
    26  arg=0
    27  
    28  /* replaced use of R10 by R11 because the former can be the data segment base register */
    29  
    30  TEXT _mulv(SB), $0
    31  	MOVW	0(FP), R0
    32  	MOVW	4(FP), R2	/* l0 */
    33  	MOVW	8(FP), R11	/* h0 */
    34  	MOVW	12(FP), R4	/* l1 */
    35  	MOVW	16(FP), R5	/* h1 */
    36  	MULLU	R4, R2, (R7,R6)
    37  	MUL	R11, R4, R8
    38  	ADD	R8, R7
    39  	MUL	R2, R5, R8
    40  	ADD	R8, R7
    41  	MOVW	R6, 0(R(arg))
    42  	MOVW	R7, 4(R(arg))
    43  	RET
    44  
    45  // trampoline for _sfloat2. passes LR as arg0 and
    46  // saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can
    47  // be changed by _sfloat2.
    48  TEXT _sfloat(SB), 7, $64 // 4 arg + 14*4 saved regs + cpsr
    49  	MOVW	R14, 4(R13)
    50  	MOVW	R0, 8(R13)
    51  	MOVW	$12(R13), R0
    52  	MOVM.IA.W	[R1-R12], (R0)
    53  	MOVW	$68(R13), R1 // correct for frame size
    54  	MOVW	R1, 60(R13)
    55  	WORD	$0xe10f1000 // mrs r1, cpsr
    56  	MOVW	R1, 64(R13)
    57  	BL	runtime·_sfloat2(SB)
    58  	MOVW	R0, 0(R13)
    59  	MOVW	64(R13), R1
    60  	WORD	$0xe128f001	// msr cpsr_f, r1
    61  	MOVW	$12(R13), R0
    62  	MOVM.IA.W	(R0), [R1-R12]
    63  	MOVW	8(R13), R0
    64  	RET
    65  
    66  // func udiv(n, d uint32) (q, r uint32)
    67  // Reference: 
    68  // Sloss, Andrew et. al; ARM System Developer's Guide: Designing and Optimizing System Software
    69  // Morgan Kaufmann; 1 edition (April 8, 2004), ISBN 978-1558608740
    70  q = 0 // input d, output q
    71  r = 1 // input n, output r
    72  s = 2 // three temporary variables
    73  m = 3
    74  a = 11
    75  // Please be careful when changing this, it is pretty fragile:
    76  // 1, don't use unconditional branch as the linker is free to reorder the blocks;
    77  // 2. if a == 11, beware that the linker will use R11 if you use certain instructions.
    78  TEXT udiv<>(SB),7,$-4
    79  	CLZ 	R(q), R(s) // find normalizing shift
    80  	MOVW.S	R(q)<<R(s), R(a)
    81  	ADD 	R(a)>>25, PC, R(a) // most significant 7 bits of divisor
    82  	MOVBU.NE	(4*36-64)(R(a)), R(a) // 36 == number of inst. between fast_udiv_tab and begin
    83  
    84  begin:
    85  	SUB.S	$7, R(s)
    86  	RSB 	$0, R(q), R(m) // m = -q
    87  	MOVW.PL	R(a)<<R(s), R(q)
    88  
    89  	// 1st Newton iteration
    90  	MUL.PL	R(m), R(q), R(a) // a = -q*d
    91  	BMI 	udiv_by_large_d
    92  	MULAWT	R(a), R(q), R(q), R(q) // q approx q-(q*q*d>>32)
    93  	TEQ 	R(m)->1, R(m) // check for d=0 or d=1
    94  
    95  	// 2nd Newton iteration
    96  	MUL.NE	R(m), R(q), R(a)
    97  	MOVW.NE	$0, R(s)
    98  	MULAL.NE R(q), R(a), (R(q),R(s))
    99  	BEQ 	udiv_by_0_or_1
   100  
   101  	// q now accurate enough for a remainder r, 0<=r<3*d
   102  	MULLU	R(q), R(r), (R(q),R(s)) // q = (r * q) >> 32	
   103  	ADD 	R(m), R(r), R(r) // r = n - d
   104  	MULA	R(m), R(q), R(r), R(r) // r = n - (q+1)*d
   105  
   106  	// since 0 <= n-q*d < 3*d; thus -d <= r < 2*d
   107  	CMN 	R(m), R(r) // t = r-d
   108  	SUB.CS	R(m), R(r), R(r) // if (t<-d || t>=0) r=r+d
   109  	ADD.CC	$1, R(q)
   110  	ADD.PL	R(m)<<1, R(r)
   111  	ADD.PL	$2, R(q)
   112  
   113  	// return, can't use RET here or fast_udiv_tab will be dropped during linking
   114  	MOVW	R14, R15
   115  
   116  udiv_by_large_d:
   117  	// at this point we know d>=2^(31-6)=2^25
   118  	SUB 	$4, R(a), R(a)
   119  	RSB 	$0, R(s), R(s)
   120  	MOVW	R(a)>>R(s), R(q)
   121  	MULLU	R(q), R(r), (R(q),R(s))
   122  	MULA	R(m), R(q), R(r), R(r)
   123  
   124  	// q now accurate enough for a remainder r, 0<=r<4*d
   125  	CMN 	R(r)>>1, R(m) // if(r/2 >= d)
   126  	ADD.CS	R(m)<<1, R(r)
   127  	ADD.CS	$2, R(q)
   128  	CMN 	R(r), R(m)
   129  	ADD.CS	R(m), R(r)
   130  	ADD.CS	$1, R(q)
   131  
   132  	// return, can't use RET here or fast_udiv_tab will be dropped during linking
   133  	MOVW	R14, R15
   134  
   135  udiv_by_0_or_1:
   136  	// carry set if d==1, carry clear if d==0
   137  	MOVW.CS	R(r), R(q)
   138  	MOVW.CS	$0, R(r)
   139  	BL.CC 	runtime·panicdivide(SB) // no way back
   140  
   141  	// return, can't use RET here or fast_udiv_tab will be dropped during linking
   142  	MOVW	R14, R15
   143  
   144  fast_udiv_tab:
   145  	// var tab [64]byte
   146  	// tab[0] = 255; for i := 1; i <= 63; i++ { tab[i] = (1<<14)/(64+i) }
   147  	// laid out here as little-endian uint32s
   148  	WORD $0xf4f8fcff
   149  	WORD $0xe6eaedf0
   150  	WORD $0xdadde0e3
   151  	WORD $0xcfd2d4d7
   152  	WORD $0xc5c7cacc
   153  	WORD $0xbcbec0c3
   154  	WORD $0xb4b6b8ba
   155  	WORD $0xacaeb0b2
   156  	WORD $0xa5a7a8aa
   157  	WORD $0x9fa0a2a3
   158  	WORD $0x999a9c9d
   159  	WORD $0x93949697
   160  	WORD $0x8e8f9092
   161  	WORD $0x898a8c8d
   162  	WORD $0x85868788
   163  	WORD $0x81828384
   164  
   165  // The linker will pass numerator in R(TMP), and it also
   166  // expects the result in R(TMP)
   167  TMP = 11
   168  
   169  TEXT _divu(SB), 7, $16
   170  	MOVW	R(q), 4(R13)
   171  	MOVW	R(r), 8(R13)
   172  	MOVW	R(s), 12(R13)
   173  	MOVW	R(m), 16(R13)
   174  
   175  	MOVW	R(TMP), R(r)		/* numerator */
   176  	MOVW	0(FP), R(q) 		/* denominator */
   177  	BL  	udiv<>(SB)
   178  	MOVW	R(q), R(TMP)
   179  	MOVW	4(R13), R(q)
   180  	MOVW	8(R13), R(r)
   181  	MOVW	12(R13), R(s)
   182  	MOVW	16(R13), R(m)
   183  	RET
   184  
   185  TEXT _modu(SB), 7, $16
   186  	MOVW	R(q), 4(R13)
   187  	MOVW	R(r), 8(R13)
   188  	MOVW	R(s), 12(R13)
   189  	MOVW	R(m), 16(R13)
   190  
   191  	MOVW	R(TMP), R(r)		/* numerator */
   192  	MOVW	0(FP), R(q) 		/* denominator */
   193  	BL  	udiv<>(SB)
   194  	MOVW	R(r), R(TMP)
   195  	MOVW	4(R13), R(q)
   196  	MOVW	8(R13), R(r)
   197  	MOVW	12(R13), R(s)
   198  	MOVW	16(R13), R(m)
   199  	RET
   200  
   201  TEXT _div(SB),7,$16
   202  	MOVW	R(q), 4(R13)
   203  	MOVW	R(r), 8(R13)
   204  	MOVW	R(s), 12(R13)
   205  	MOVW	R(m), 16(R13)
   206  	MOVW	R(TMP), R(r)		/* numerator */
   207  	MOVW	0(FP), R(q) 		/* denominator */
   208  	CMP 	$0, R(r)
   209  	BGE 	d1
   210  	RSB 	$0, R(r), R(r)
   211  	CMP 	$0, R(q)
   212  	BGE 	d2
   213  	RSB 	$0, R(q), R(q)
   214  d0:
   215  	BL  	udiv<>(SB)  		/* none/both neg */
   216  	MOVW	R(q), R(TMP)
   217  	B		out
   218  d1:
   219  	CMP 	$0, R(q)
   220  	BGE 	d0
   221  	RSB 	$0, R(q), R(q)
   222  d2:
   223  	BL  	udiv<>(SB)  		/* one neg */
   224  	RSB		$0, R(q), R(TMP)
   225  	B   	out
   226  
   227  TEXT _mod(SB),7,$16
   228  	MOVW	R(q), 4(R13)
   229  	MOVW	R(r), 8(R13)
   230  	MOVW	R(s), 12(R13)
   231  	MOVW	R(m), 16(R13)
   232  	MOVW	R(TMP), R(r)		/* numerator */
   233  	MOVW	0(FP), R(q) 		/* denominator */
   234  	CMP 	$0, R(q)
   235  	RSB.LT	$0, R(q), R(q)
   236  	CMP 	$0, R(r)
   237  	BGE 	m1
   238  	RSB 	$0, R(r), R(r)
   239  	BL  	udiv<>(SB)  		/* neg numerator */
   240  	RSB 	$0, R(r), R(TMP)
   241  	B   	out
   242  m1:
   243  	BL  	udiv<>(SB)  		/* pos numerator */
   244  	MOVW	R(r), R(TMP)
   245  out:
   246  	MOVW	4(R13), R(q)
   247  	MOVW	8(R13), R(r)
   248  	MOVW	12(R13), R(s)
   249  	MOVW	16(R13), R(m)
   250  	RET