github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/math/big/arith_arm.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file provides fast assembly versions for the elementary
     6  // arithmetic operations on vectors implemented in arith.go.
     7  
     8  #define CFLAG 29	// bit position of carry flag
     9  
    10  // func addVV(z, x, y []Word) (c Word)
    11  TEXT ·addVV(SB),7,$0
    12  	MOVW	$0, R0
    13  	MOVW	z+0(FP), R1
    14  	MOVW	x+12(FP), R2
    15  	MOVW	y+24(FP), R3
    16  	MOVW	z_len+4(FP), R4
    17  	MOVW	R4<<2, R4
    18  	ADD	R1, R4
    19  	B E1
    20  L1:
    21  	MOVW.P	4(R2), R5
    22  	MOVW.P	4(R3), R6
    23  	MOVW	R0, CPSR
    24  	ADC.S	R6, R5
    25  	MOVW.P	R5, 4(R1)
    26  	MOVW	CPSR, R0
    27  E1:
    28  	CMP	R1, R4
    29  	BNE L1
    30  
    31  	MOVW	R0>>CFLAG, R0
    32  	AND	$1, R0
    33  	MOVW	R0, c+36(FP)
    34  	RET
    35  
    36  
    37  // func subVV(z, x, y []Word) (c Word)
    38  // (same as addVV except for SBC instead of ADC and label names)
    39  TEXT ·subVV(SB),7,$0
    40  	MOVW	$(1<<CFLAG), R0
    41  	MOVW	z+0(FP), R1
    42  	MOVW	x+12(FP), R2
    43  	MOVW	y+24(FP), R3
    44  	MOVW	z_len+4(FP), R4
    45  	MOVW	R4<<2, R4
    46  	ADD	R1, R4
    47  	B E2
    48  L2:
    49  	MOVW.P	4(R2), R5
    50  	MOVW.P	4(R3), R6
    51  	MOVW	R0, CPSR
    52  	SBC.S	R6, R5
    53  	MOVW.P	R5, 4(R1)
    54  	MOVW	CPSR, R0
    55  E2:
    56  	CMP	R1, R4
    57  	BNE L2
    58  
    59  	MOVW	R0>>CFLAG, R0
    60  	AND	$1, R0
    61  	EOR	$1, R0
    62  	MOVW	R0, c+36(FP)
    63  	RET
    64  
    65  
    66  // func addVW(z, x []Word, y Word) (c Word)
    67  TEXT ·addVW(SB),7,$0
    68  	MOVW	z+0(FP), R1
    69  	MOVW	x+12(FP), R2
    70  	MOVW	y+24(FP), R3
    71  	MOVW	z_len+4(FP), R4
    72  	MOVW	R4<<2, R4
    73  	ADD	R1, R4
    74  	CMP	R1, R4
    75  	BNE L3a
    76  	MOVW	R3, c+28(FP)
    77  	RET
    78  L3a:
    79  	MOVW.P	4(R2), R5
    80  	ADD.S	R3, R5
    81  	MOVW.P	R5, 4(R1)
    82  	MOVW	CPSR, R0
    83  	B	E3
    84  L3:
    85  	MOVW.P	4(R2), R5
    86  	MOVW	R0, CPSR
    87  	ADC.S	$0, R5
    88  	MOVW.P	R5, 4(R1)
    89  	MOVW	CPSR, R0
    90  E3:
    91  	CMP	R1, R4
    92  	BNE	L3
    93  
    94  	MOVW	R0>>CFLAG, R0
    95  	AND	$1, R0
    96  	MOVW	R0, c+28(FP)
    97  	RET
    98  
    99  
   100  // func subVW(z, x []Word, y Word) (c Word)
   101  TEXT ·subVW(SB),7,$0
   102  	MOVW	z+0(FP), R1
   103  	MOVW	x+12(FP), R2
   104  	MOVW	y+24(FP), R3
   105  	MOVW	z_len+4(FP), R4
   106  	MOVW	R4<<2, R4
   107  	ADD	R1, R4
   108  	CMP	R1, R4
   109  	BNE L4a
   110  	MOVW	R3, c+28(FP)
   111  	RET
   112  L4a:
   113  	MOVW.P	4(R2), R5
   114  	SUB.S	R3, R5
   115  	MOVW.P	R5, 4(R1)
   116  	MOVW	CPSR, R0
   117  	B	E4
   118  L4:
   119  	MOVW.P	4(R2), R5
   120  	MOVW	R0, CPSR
   121  	SBC.S	$0, R5
   122  	MOVW.P	R5, 4(R1)
   123  	MOVW	CPSR, R0
   124  E4:
   125  	CMP	R1, R4
   126  	BNE	L4
   127  
   128  	MOVW	R0>>CFLAG, R0
   129  	AND	$1, R0
   130  	EOR	$1, R0
   131  	MOVW	R0, c+28(FP)
   132  	RET
   133  
   134  
   135  // func shlVU(z, x []Word, s uint) (c Word)
   136  TEXT ·shlVU(SB),7,$0
   137  	MOVW	z_len+4(FP), R5
   138  	CMP	$0, R5
   139  	BEQ	X7
   140  	
   141  	MOVW	z+0(FP), R1
   142  	MOVW	x+12(FP), R2
   143  	MOVW	R5<<2, R5
   144  	ADD	R5, R2
   145  	ADD	R1, R5
   146  	MOVW	s+24(FP), R3
   147  	CMP	$0, R3	// shift 0 is special
   148  	BEQ	Y7
   149  	ADD	$4, R1	// stop one word early
   150  	MOVW	$32, R4
   151  	SUB	R3, R4
   152  	MOVW	$0, R7
   153  	
   154  	MOVW.W	-4(R2), R6
   155  	MOVW	R6<<R3, R7
   156  	MOVW	R6>>R4, R6
   157  	MOVW	R6, c+28(FP)
   158  	B E7
   159  
   160  L7:
   161  	MOVW.W	-4(R2), R6
   162  	ORR	R6>>R4, R7
   163  	MOVW.W	R7, -4(R5)
   164  	MOVW	R6<<R3, R7
   165  E7:
   166  	CMP	R1, R5
   167  	BNE	L7
   168  
   169  	MOVW	R7, -4(R5)
   170  	RET
   171  
   172  Y7:	// copy loop, because shift 0 == shift 32
   173  	MOVW.W	-4(R2), R6
   174  	MOVW.W	R6, -4(R5)
   175  	CMP	R1, R5
   176  	BNE Y7
   177  
   178  X7:
   179  	MOVW	$0, R1
   180  	MOVW	R1, c+28(FP)
   181  	RET
   182  
   183  
   184  // func shrVU(z, x []Word, s uint) (c Word)
   185  TEXT ·shrVU(SB),7,$0
   186  	MOVW	z_len+4(FP), R5
   187  	CMP	$0, R5
   188  	BEQ	X6
   189  
   190  	MOVW	z+0(FP), R1
   191  	MOVW	x+12(FP), R2
   192  	MOVW	R5<<2, R5
   193  	ADD	R1, R5
   194  	MOVW	s+24(FP), R3
   195  	CMP	$0, R3	// shift 0 is special
   196  	BEQ Y6
   197  	SUB	$4, R5	// stop one word early
   198  	MOVW	$32, R4
   199  	SUB	R3, R4
   200  	MOVW	$0, R7
   201  
   202  	// first word
   203  	MOVW.P	4(R2), R6
   204  	MOVW	R6>>R3, R7
   205  	MOVW	R6<<R4, R6
   206  	MOVW	R6, c+28(FP)
   207  	B E6
   208  
   209  	// word loop
   210  L6:
   211  	MOVW.P	4(R2), R6
   212  	ORR	R6<<R4, R7
   213  	MOVW.P	R7, 4(R1)
   214  	MOVW	R6>>R3, R7
   215  E6:
   216  	CMP	R1, R5
   217  	BNE	L6
   218  
   219  	MOVW	R7, 0(R1)
   220  	RET
   221  
   222  Y6:	// copy loop, because shift 0 == shift 32
   223  	MOVW.P	4(R2), R6
   224  	MOVW.P	R6, 4(R1)
   225  	CMP R1, R5
   226  	BNE Y6
   227  
   228  X6:
   229  	MOVW	$0, R1
   230  	MOVW	R1, c+28(FP)
   231  	RET
   232  
   233  
   234  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   235  TEXT ·mulAddVWW(SB),7,$0
   236  	MOVW	$0, R0
   237  	MOVW	z+0(FP), R1
   238  	MOVW	x+12(FP), R2
   239  	MOVW	y+24(FP), R3
   240  	MOVW	r+28(FP), R4
   241  	MOVW	z_len+4(FP), R5
   242  	MOVW	R5<<2, R5
   243  	ADD	R1, R5
   244  	B E8
   245  
   246  	// word loop
   247  L8:
   248  	MOVW.P	4(R2), R6
   249  	MULLU	R6, R3, (R7, R6)
   250  	ADD.S	R4, R6
   251  	ADC	R0, R7
   252  	MOVW.P	R6, 4(R1)
   253  	MOVW	R7, R4
   254  E8:
   255  	CMP	R1, R5
   256  	BNE	L8
   257  
   258  	MOVW	R4, c+32(FP)
   259  	RET
   260  
   261  
   262  // func addMulVVW(z, x []Word, y Word) (c Word)
   263  TEXT ·addMulVVW(SB),7,$0
   264  	MOVW	$0, R0
   265  	MOVW	z+0(FP), R1
   266  	MOVW	x+12(FP), R2
   267  	MOVW	y+24(FP), R3
   268  	MOVW	z_len+4(FP), R5
   269  	MOVW	R5<<2, R5
   270  	ADD	R1, R5
   271  	MOVW	$0, R4
   272  	B E9
   273  
   274  	// word loop
   275  L9:
   276  	MOVW.P	4(R2), R6
   277  	MULLU	R6, R3, (R7, R6)
   278  	ADD.S	R4, R6
   279  	ADC	R0, R7
   280  	MOVW	0(R1), R4
   281  	ADD.S	R4, R6
   282  	ADC	R0, R7
   283  	MOVW.P	R6, 4(R1)
   284  	MOVW	R7, R4
   285  E9:
   286  	CMP	R1, R5
   287  	BNE	L9
   288  
   289  	MOVW	R4, c+28(FP)
   290  	RET
   291  
   292  
   293  // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
   294  TEXT ·divWVW(SB),7,$0
   295  	// ARM has no multiword division, so use portable code.
   296  	B ·divWVW_g(SB)
   297  
   298  
   299  // func divWW(x1, x0, y Word) (q, r Word)
   300  TEXT ·divWW(SB),7,$0
   301  	// ARM has no multiword division, so use portable code.
   302  	B ·divWW_g(SB)
   303  
   304  
   305  // func mulWW(x, y Word) (z1, z0 Word)
   306  TEXT ·mulWW(SB),7,$0
   307  	MOVW	x+0(FP), R1
   308  	MOVW	y+4(FP), R2
   309  	MULLU	R1, R2, (R4, R3)
   310  	MOVW	R4, z1+8(FP)
   311  	MOVW	R3, z0+12(FP)
   312  	RET
   313  
   314  // func bitLen(x Word) (n int)
   315  TEXT ·bitLen(SB),7,$0
   316  	MOVW	x+0(FP), R0
   317  	CLZ 	R0, R0
   318  	MOVW	$32, R1
   319  	SUB.S	R0, R1
   320  	MOVW	R1, n+4(FP)
   321  	RET