github.com/ethw3/go-ethereuma@v0.0.0-20221013053120-c14602a4c23c/crypto/bn256/cloudflare/gfp_arm64.s (about) 1 // +build arm64,!generic 2 3 #define storeBlock(a0,a1,a2,a3, r) \ 4 MOVD a0, 0+r \ 5 MOVD a1, 8+r \ 6 MOVD a2, 16+r \ 7 MOVD a3, 24+r 8 9 #define loadBlock(r, a0,a1,a2,a3) \ 10 MOVD 0+r, a0 \ 11 MOVD 8+r, a1 \ 12 MOVD 16+r, a2 \ 13 MOVD 24+r, a3 14 15 #define loadModulus(p0,p1,p2,p3) \ 16 MOVD ·p2+0(SB), p0 \ 17 MOVD ·p2+8(SB), p1 \ 18 MOVD ·p2+16(SB), p2 \ 19 MOVD ·p2+24(SB), p3 20 21 #include "mul_arm64.h" 22 23 TEXT ·gfpNeg(SB),0,$0-16 24 MOVD a+8(FP), R0 25 loadBlock(0(R0), R1,R2,R3,R4) 26 loadModulus(R5,R6,R7,R8) 27 28 SUBS R1, R5, R1 29 SBCS R2, R6, R2 30 SBCS R3, R7, R3 31 SBCS R4, R8, R4 32 33 SUBS R5, R1, R5 34 SBCS R6, R2, R6 35 SBCS R7, R3, R7 36 SBCS R8, R4, R8 37 38 CSEL CS, R5, R1, R1 39 CSEL CS, R6, R2, R2 40 CSEL CS, R7, R3, R3 41 CSEL CS, R8, R4, R4 42 43 MOVD c+0(FP), R0 44 storeBlock(R1,R2,R3,R4, 0(R0)) 45 RET 46 47 TEXT ·gfpAdd(SB),0,$0-24 48 MOVD a+8(FP), R0 49 loadBlock(0(R0), R1,R2,R3,R4) 50 MOVD b+16(FP), R0 51 loadBlock(0(R0), R5,R6,R7,R8) 52 loadModulus(R9,R10,R11,R12) 53 MOVD ZR, R0 54 55 ADDS R5, R1 56 ADCS R6, R2 57 ADCS R7, R3 58 ADCS R8, R4 59 ADCS ZR, R0 60 61 SUBS R9, R1, R5 62 SBCS R10, R2, R6 63 SBCS R11, R3, R7 64 SBCS R12, R4, R8 65 SBCS ZR, R0, R0 66 67 CSEL CS, R5, R1, R1 68 CSEL CS, R6, R2, R2 69 CSEL CS, R7, R3, R3 70 CSEL CS, R8, R4, R4 71 72 MOVD c+0(FP), R0 73 storeBlock(R1,R2,R3,R4, 0(R0)) 74 RET 75 76 TEXT ·gfpSub(SB),0,$0-24 77 MOVD a+8(FP), R0 78 loadBlock(0(R0), R1,R2,R3,R4) 79 MOVD b+16(FP), R0 80 loadBlock(0(R0), R5,R6,R7,R8) 81 loadModulus(R9,R10,R11,R12) 82 83 SUBS R5, R1 84 SBCS R6, R2 85 SBCS R7, R3 86 SBCS R8, R4 87 88 CSEL CS, ZR, R9, R9 89 CSEL CS, ZR, R10, R10 90 CSEL CS, ZR, R11, R11 91 CSEL CS, ZR, R12, R12 92 93 ADDS R9, R1 94 ADCS R10, R2 95 ADCS R11, R3 96 ADCS R12, R4 97 98 MOVD c+0(FP), R0 99 storeBlock(R1,R2,R3,R4, 0(R0)) 100 RET 101 102 TEXT ·gfpMul(SB),0,$0-24 103 MOVD a+8(FP), R0 104 loadBlock(0(R0), R1,R2,R3,R4) 105 MOVD b+16(FP), R0 106 loadBlock(0(R0), R5,R6,R7,R8) 107 108 mul(R9,R10,R11,R12,R13,R14,R15,R16) 109 gfpReduce() 110 111 MOVD c+0(FP), R0 112 storeBlock(R1,R2,R3,R4, 0(R0)) 113 RET