github.com/ethw3/go-ethereuma@v0.0.0-20221013053120-c14602a4c23c/crypto/bn256/cloudflare/gfp_arm64.s (about)

     1  // +build arm64,!generic
     2  
     3  #define storeBlock(a0,a1,a2,a3, r) \
     4  	MOVD a0,  0+r \
     5  	MOVD a1,  8+r \
     6  	MOVD a2, 16+r \
     7  	MOVD a3, 24+r
     8  
     9  #define loadBlock(r, a0,a1,a2,a3) \
    10  	MOVD  0+r, a0 \
    11  	MOVD  8+r, a1 \
    12  	MOVD 16+r, a2 \
    13  	MOVD 24+r, a3
    14  
    15  #define loadModulus(p0,p1,p2,p3) \
    16  	MOVD ·p2+0(SB), p0 \
    17  	MOVD ·p2+8(SB), p1 \
    18  	MOVD ·p2+16(SB), p2 \
    19  	MOVD ·p2+24(SB), p3
    20  
    21  #include "mul_arm64.h"
    22  
    23  TEXT ·gfpNeg(SB),0,$0-16
    24  	MOVD a+8(FP), R0
    25  	loadBlock(0(R0), R1,R2,R3,R4)
    26  	loadModulus(R5,R6,R7,R8)
    27  
    28  	SUBS R1, R5, R1
    29  	SBCS R2, R6, R2
    30  	SBCS R3, R7, R3
    31  	SBCS R4, R8, R4
    32  
    33  	SUBS R5, R1, R5
    34  	SBCS R6, R2, R6
    35  	SBCS R7, R3, R7
    36  	SBCS R8, R4, R8
    37  
    38  	CSEL CS, R5, R1, R1
    39  	CSEL CS, R6, R2, R2
    40  	CSEL CS, R7, R3, R3
    41  	CSEL CS, R8, R4, R4
    42  
    43  	MOVD c+0(FP), R0
    44  	storeBlock(R1,R2,R3,R4, 0(R0))
    45  	RET
    46  
    47  TEXT ·gfpAdd(SB),0,$0-24
    48  	MOVD a+8(FP), R0
    49  	loadBlock(0(R0), R1,R2,R3,R4)
    50  	MOVD b+16(FP), R0
    51  	loadBlock(0(R0), R5,R6,R7,R8)
    52  	loadModulus(R9,R10,R11,R12)
    53  	MOVD ZR, R0
    54  
    55  	ADDS R5, R1
    56  	ADCS R6, R2
    57  	ADCS R7, R3
    58  	ADCS R8, R4
    59  	ADCS ZR, R0
    60  
    61  	SUBS  R9, R1, R5
    62  	SBCS R10, R2, R6
    63  	SBCS R11, R3, R7
    64  	SBCS R12, R4, R8
    65  	SBCS  ZR, R0, R0
    66  
    67  	CSEL CS, R5, R1, R1
    68  	CSEL CS, R6, R2, R2
    69  	CSEL CS, R7, R3, R3
    70  	CSEL CS, R8, R4, R4
    71  
    72  	MOVD c+0(FP), R0
    73  	storeBlock(R1,R2,R3,R4, 0(R0))
    74  	RET
    75  
    76  TEXT ·gfpSub(SB),0,$0-24
    77  	MOVD a+8(FP), R0
    78  	loadBlock(0(R0), R1,R2,R3,R4)
    79  	MOVD b+16(FP), R0
    80  	loadBlock(0(R0), R5,R6,R7,R8)
    81  	loadModulus(R9,R10,R11,R12)
    82  
    83  	SUBS R5, R1
    84  	SBCS R6, R2
    85  	SBCS R7, R3
    86  	SBCS R8, R4
    87  
    88  	CSEL CS, ZR,  R9,  R9
    89  	CSEL CS, ZR, R10, R10
    90  	CSEL CS, ZR, R11, R11
    91  	CSEL CS, ZR, R12, R12
    92  
    93  	ADDS  R9, R1
    94  	ADCS R10, R2
    95  	ADCS R11, R3
    96  	ADCS R12, R4
    97  
    98  	MOVD c+0(FP), R0
    99  	storeBlock(R1,R2,R3,R4, 0(R0))
   100  	RET
   101  
   102  TEXT ·gfpMul(SB),0,$0-24
   103  	MOVD a+8(FP), R0
   104  	loadBlock(0(R0), R1,R2,R3,R4)
   105  	MOVD b+16(FP), R0
   106  	loadBlock(0(R0), R5,R6,R7,R8)
   107  
   108  	mul(R9,R10,R11,R12,R13,R14,R15,R16)
   109  	gfpReduce()
   110  
   111  	MOVD c+0(FP), R0
   112  	storeBlock(R1,R2,R3,R4, 0(R0))
   113  	RET