github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/math/big/arith_arm64.s (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !math_big_pure_go
     6  
     7  #include "textflag.h"
     8  
     9  // This file provides fast assembly versions for the elementary
    10  // arithmetic operations on vectors implemented in arith.go.
    11  
    12  // TODO: Consider re-implementing using Advanced SIMD
    13  // once the assembler supports those instructions.
    14  
    15  // func mulWW(x, y Word) (z1, z0 Word)
    16  TEXT ·mulWW(SB),NOSPLIT,$0
    17  	MOVD	x+0(FP), R0
    18  	MOVD	y+8(FP), R1
    19  	MUL	R0, R1, R2
    20  	UMULH	R0, R1, R3
    21  	MOVD	R3, z1+16(FP)
    22  	MOVD	R2, z0+24(FP)
    23  	RET
    24  
    25  
    26  // func divWW(x1, x0, y Word) (q, r Word)
    27  TEXT ·divWW(SB),NOSPLIT,$0
    28  	B	·divWW_g(SB) // ARM64 has no multiword division
    29  
    30  
    31  // func addVV(z, x, y []Word) (c Word)
    32  TEXT ·addVV(SB),NOSPLIT,$0
    33  	MOVD	z+0(FP), R3
    34  	MOVD	z_len+8(FP), R0
    35  	MOVD	x+24(FP), R1
    36  	MOVD	y+48(FP), R2
    37  	ADDS	$0, R0 // clear carry flag
    38  loop:
    39  	CBZ	R0, done // careful not to touch the carry flag
    40  	MOVD.P	8(R1), R4
    41  	MOVD.P	8(R2), R5
    42  	ADCS	R4, R5
    43  	MOVD.P	R5, 8(R3)
    44  	SUB	$1, R0
    45  	B	loop
    46  done:
    47  	CSET	HS, R0 // extract carry flag
    48  	MOVD	R0, c+72(FP)
    49  	RET
    50  
    51  
    52  // func subVV(z, x, y []Word) (c Word)
    53  TEXT ·subVV(SB),NOSPLIT,$0
    54  	MOVD	z+0(FP), R3
    55  	MOVD	z_len+8(FP), R0
    56  	MOVD	x+24(FP), R1
    57  	MOVD	y+48(FP), R2
    58  	CMP	R0, R0 // set carry flag
    59  loop:
    60  	CBZ	R0, done // careful not to touch the carry flag
    61  	MOVD.P	8(R1), R4
    62  	MOVD.P	8(R2), R5
    63  	SBCS	R5, R4
    64  	MOVD.P	R4, 8(R3)
    65  	SUB	$1, R0
    66  	B	loop
    67  done:
    68  	CSET	LO, R0 // extract carry flag
    69  	MOVD	R0, c+72(FP)
    70  	RET
    71  
    72  
    73  // func addVW(z, x []Word, y Word) (c Word)
    74  TEXT ·addVW(SB),NOSPLIT,$0
    75  	MOVD	z+0(FP), R3
    76  	MOVD	z_len+8(FP), R0
    77  	MOVD	x+24(FP), R1
    78  	MOVD	y+48(FP), R2
    79  	CBZ	R0, return_y
    80  	MOVD.P	8(R1), R4
    81  	ADDS	R2, R4
    82  	MOVD.P	R4, 8(R3)
    83  	SUB	$1, R0
    84  loop:
    85  	CBZ	R0, done // careful not to touch the carry flag
    86  	MOVD.P	8(R1), R4
    87  	ADCS	$0, R4
    88  	MOVD.P	R4, 8(R3)
    89  	SUB	$1, R0
    90  	B	loop
    91  done:
    92  	CSET	HS, R0 // extract carry flag
    93  	MOVD	R0, c+56(FP)
    94  	RET
    95  return_y: // z is empty; copy y to c
    96  	MOVD	R2, c+56(FP)
    97  	RET
    98  
    99  
   100  // func subVW(z, x []Word, y Word) (c Word)
   101  TEXT ·subVW(SB),NOSPLIT,$0
   102  	MOVD	z+0(FP), R3
   103  	MOVD	z_len+8(FP), R0
   104  	MOVD	x+24(FP), R1
   105  	MOVD	y+48(FP), R2
   106  	CBZ	R0, rety
   107  	MOVD.P	8(R1), R4
   108  	SUBS	R2, R4
   109  	MOVD.P	R4, 8(R3)
   110  	SUB	$1, R0
   111  loop:
   112  	CBZ	R0, done // careful not to touch the carry flag
   113  	MOVD.P	8(R1), R4
   114  	SBCS	$0, R4
   115  	MOVD.P	R4, 8(R3)
   116  	SUB	$1, R0
   117  	B	loop
   118  done:
   119  	CSET	LO, R0 // extract carry flag
   120  	MOVD	R0, c+56(FP)
   121  	RET
   122  rety: // z is empty; copy y to c
   123  	MOVD	R2, c+56(FP)
   124  	RET
   125  
   126  
   127  // func shlVU(z, x []Word, s uint) (c Word)
   128  TEXT ·shlVU(SB),NOSPLIT,$0
   129  	B ·shlVU_g(SB)
   130  
   131  
   132  // func shrVU(z, x []Word, s uint) (c Word)
   133  TEXT ·shrVU(SB),NOSPLIT,$0
   134  	B ·shrVU_g(SB)
   135  
   136  
   137  // func mulAddVWW(z, x []Word, y, r Word) (c Word)
   138  TEXT ·mulAddVWW(SB),NOSPLIT,$0
   139  	MOVD	z+0(FP), R1
   140  	MOVD	z_len+8(FP), R0
   141  	MOVD	x+24(FP), R2
   142  	MOVD	y+48(FP), R3
   143  	MOVD	r+56(FP), R4
   144  loop:
   145  	CBZ	R0, done
   146  	MOVD.P	8(R2), R5
   147  	UMULH	R5, R3, R7
   148  	MUL	R5, R3, R6
   149  	ADDS	R4, R6
   150  	ADC	$0, R7
   151  	MOVD.P	R6, 8(R1)
   152  	MOVD	R7, R4
   153  	SUB	$1, R0
   154  	B	loop
   155  done:
   156  	MOVD	R4, c+64(FP)
   157  	RET
   158  
   159  
   160  // func addMulVVW(z, x []Word, y Word) (c Word)
   161  TEXT ·addMulVVW(SB),NOSPLIT,$0
   162  	B ·addMulVVW_g(SB)
   163  
   164  
   165  // func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
   166  TEXT ·divWVW(SB),NOSPLIT,$0
   167  	B ·divWVW_g(SB)