github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/math/big/arith_arm.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !math_big_pure_go
     6  // +build !math_big_pure_go
     7  
     8  #include "textflag.h"
     9  
    10  // This file provides fast assembly versions for the elementary
    11  // arithmetic operations on vectors implemented in arith.go.
    12  
    13  // func addVV(z, x, y []Word) (c Word)
    14  TEXT ·addVV(SB),NOSPLIT,$0
    15  	ADD.S	$0, R0		// clear carry flag
    16  	MOVW	z+0(FP), R1
    17  	MOVW	z_len+4(FP), R4
    18  	MOVW	x+12(FP), R2
    19  	MOVW	y+24(FP), R3
    20  	ADD	R4<<2, R1, R4
    21  	B E1
    22  L1:
    23  	MOVW.P	4(R2), R5
    24  	MOVW.P	4(R3), R6
    25  	ADC.S	R6, R5
    26  	MOVW.P	R5, 4(R1)
    27  E1:
    28  	TEQ	R1, R4
    29  	BNE L1
    30  
    31  	MOVW	$0, R0
    32  	MOVW.CS	$1, R0
    33  	MOVW	R0, c+36(FP)
    34  	RET
    35  
    36  
    37  // func subVV(z, x, y []Word) (c Word)
    38  // (same as addVV except for SBC instead of ADC and label names)
    39  TEXT ·subVV(SB),NOSPLIT,$0
    40  	SUB.S	$0, R0		// clear borrow flag
    41  	MOVW	z+0(FP), R1
    42  	MOVW	z_len+4(FP), R4
    43  	MOVW	x+12(FP), R2
    44  	MOVW	y+24(FP), R3
    45  	ADD	R4<<2, R1, R4
    46  	B E2
    47  L2:
    48  	MOVW.P	4(R2), R5
    49  	MOVW.P	4(R3), R6
    50  	SBC.S	R6, R5
    51  	MOVW.P	R5, 4(R1)
    52  E2:
    53  	TEQ	R1, R4
    54  	BNE L2
    55  
    56  	MOVW	$0, R0
    57  	MOVW.CC	$1, R0
    58  	MOVW	R0, c+36(FP)
    59  	RET
    60  
    61  
    62  // func addVW(z, x []Word, y Word) (c Word)
    63  TEXT ·addVW(SB),NOSPLIT,$0
    64  	MOVW	z+0(FP), R1
    65  	MOVW	z_len+4(FP), R4
    66  	MOVW	x+12(FP), R2
    67  	MOVW	y+24(FP), R3
    68  	ADD	R4<<2, R1, R4
    69  	TEQ	R1, R4
    70  	BNE L3a
    71  	MOVW	R3, c+28(FP)
    72  	RET
    73  L3a:
    74  	MOVW.P	4(R2), R5
    75  	ADD.S	R3, R5
    76  	MOVW.P	R5, 4(R1)
    77  	B	E3
    78  L3:
    79  	MOVW.P	4(R2), R5
    80  	ADC.S	$0, R5
    81  	MOVW.P	R5, 4(R1)
    82  E3:
    83  	TEQ	R1, R4
    84  	BNE	L3
    85  
    86  	MOVW	$0, R0
    87  	MOVW.CS	$1, R0
    88  	MOVW	R0, c+28(FP)
    89  	RET
    90  
    91  
    92  // func subVW(z, x []Word, y Word) (c Word)
    93  TEXT ·subVW(SB),NOSPLIT,$0
    94  	MOVW	z+0(FP), R1
    95  	MOVW	z_len+4(FP), R4
    96  	MOVW	x+12(FP), R2
    97  	MOVW	y+24(FP), R3
    98  	ADD	R4<<2, R1, R4
    99  	TEQ	R1, R4
   100  	BNE L4a
   101  	MOVW	R3, c+28(FP)
   102  	RET
   103  L4a:
   104  	MOVW.P	4(R2), R5
   105  	SUB.S	R3, R5
   106  	MOVW.P	R5, 4(R1)
   107  	B	E4
   108  L4:
   109  	MOVW.P	4(R2), R5
   110  	SBC.S	$0, R5
   111  	MOVW.P	R5, 4(R1)
   112  E4:
   113  	TEQ	R1, R4
   114  	BNE	L4
   115  
   116  	MOVW	$0, R0
   117  	MOVW.CC	$1, R0
   118  	MOVW	R0, c+28(FP)
   119  	RET
   120  
   121  
   122  // func shlVU(z, x []Word, s uint) (c Word)
   123  TEXT ·shlVU(SB),NOSPLIT,$0
   124  	MOVW	z_len+4(FP), R5
   125  	TEQ	$0, R5
   126  	BEQ	X7
   127  
   128  	MOVW	z+0(FP), R1
   129  	MOVW	x+12(FP), R2
   130  	ADD	R5<<2, R2, R2
   131  	ADD	R5<<2, R1, R5
   132  	MOVW	s+24(FP), R3
   133  	TEQ	$0, R3	// shift 0 is special
   134  	BEQ	Y7
   135  	ADD	$4, R1	// stop one word early
   136  	MOVW	$32, R4
   137  	SUB	R3, R4
   138  	MOVW	$0, R7
   139  
   140  	MOVW.W	-4(R2), R6
   141  	MOVW	R6<<R3, R7
   142  	MOVW	R6>>R4, R6
   143  	MOVW	R6, c+28(FP)
   144  	B E7
   145  
   146  L7:
   147  	MOVW.W	-4(R2), R6
   148  	ORR	R6>>R4, R7
   149  	MOVW.W	R7, -4(R5)
   150  	MOVW	R6<<R3, R7
   151  E7:
   152  	TEQ	R1, R5
   153  	BNE	L7
   154  
   155  	MOVW	R7, -4(R5)
   156  	RET
   157  
   158  Y7:	// copy loop, because shift 0 == shift 32
   159  	MOVW.W	-4(R2), R6
   160  	MOVW.W	R6, -4(R5)
   161  	TEQ	R1, R5
   162  	BNE Y7
   163  
   164  X7:
   165  	MOVW	$0, R1
   166  	MOVW	R1, c+28(FP)
   167  	RET
   168  
   169  
   170  // func shrVU(z, x []Word, s uint) (c Word)
   171  TEXT ·shrVU(SB),NOSPLIT,$0
   172  	MOVW	z_len+4(FP), R5
   173  	TEQ	$0, R5
   174  	BEQ	X6
   175  
   176  	MOVW	z+0(FP), R1
   177  	MOVW	x+12(FP), R2
   178  	ADD	R5<<2, R1, R5
   179  	MOVW	s+24(FP), R3
   180  	TEQ	$0, R3	// shift 0 is special
   181  	BEQ Y6
   182  	SUB	$4, R5	// stop one word early
   183  	MOVW	$32, R4
   184  	SUB	R3, R4
   185  	MOVW	$0, R7
   186  
   187  	// first word
   188  	MOVW.P	4(R2), R6
   189  	MOVW	R6>>R3, R7
   190  	MOVW	R6<<R4, R6
   191  	MOVW	R6, c+28(FP)
   192  	B E6
   193  
   194  	// word loop
   195  L6:
   196  	MOVW.P	4(R2), R6
   197  	ORR	R6<<R4, R7
   198  	MOVW.P	R7, 4(R1)
   199  	MOVW	R6>>R3, R7
   200  E6:
   201  	TEQ	R1, R5
   202  	BNE	L6
   203  
   204  	MOVW	R7, 0(R1)
   205  	RET
   206  
   207  Y6:	// copy loop, because shift 0 == shift 32
   208  	MOVW.P	4(R2), R6
   209  	MOVW.P	R6, 4(R1)
   210  	TEQ R1, R5
   211  	BNE Y6
   212  
   213  X6:
   214  	MOVW	$0, R1
   215  	MOVW	R1, c+28(FP)
   216  	RET
   217  
   218  
   219  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   220  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   221  	MOVW	$0, R0
   222  	MOVW	z+0(FP), R1
   223  	MOVW	z_len+4(FP), R5
   224  	MOVW	x+12(FP), R2
   225  	MOVW	y+24(FP), R3
   226  	MOVW	r+28(FP), R4
   227  	ADD	R5<<2, R1, R5
   228  	B E8
   229  
   230  	// word loop
   231  L8:
   232  	MOVW.P	4(R2), R6
   233  	MULLU	R6, R3, (R7, R6)
   234  	ADD.S	R4, R6
   235  	ADC	R0, R7
   236  	MOVW.P	R6, 4(R1)
   237  	MOVW	R7, R4
   238  E8:
   239  	TEQ	R1, R5
   240  	BNE	L8
   241  
   242  	MOVW	R4, c+32(FP)
   243  	RET
   244  
   245  
   246  // func addMulVVW(z, x []Word, y Word) (c Word)
   247  TEXT ·addMulVVW(SB),NOSPLIT,$0
   248  	MOVW	$0, R0
   249  	MOVW	z+0(FP), R1
   250  	MOVW	z_len+4(FP), R5
   251  	MOVW	x+12(FP), R2
   252  	MOVW	y+24(FP), R3
   253  	ADD	R5<<2, R1, R5
   254  	MOVW	$0, R4
   255  	B E9
   256  
   257  	// word loop
   258  L9:
   259  	MOVW.P	4(R2), R6
   260  	MULLU	R6, R3, (R7, R6)
   261  	ADD.S	R4, R6
   262  	ADC	R0, R7
   263  	MOVW	0(R1), R4
   264  	ADD.S	R4, R6
   265  	ADC	R0, R7
   266  	MOVW.P	R6, 4(R1)
   267  	MOVW	R7, R4
   268  E9:
   269  	TEQ	R1, R5
   270  	BNE	L9
   271  
   272  	MOVW	R4, c+28(FP)
   273  	RET