github.com/codingfuture/orig-energi3@v0.8.4/crypto/bn256/cloudflare/gfp_amd64.s (about) 1 // +build amd64,!generic 2 3 #define storeBlock(a0,a1,a2,a3, r) \ 4 MOVQ a0, 0+r \ 5 MOVQ a1, 8+r \ 6 MOVQ a2, 16+r \ 7 MOVQ a3, 24+r 8 9 #define loadBlock(r, a0,a1,a2,a3) \ 10 MOVQ 0+r, a0 \ 11 MOVQ 8+r, a1 \ 12 MOVQ 16+r, a2 \ 13 MOVQ 24+r, a3 14 15 #define gfpCarry(a0,a1,a2,a3,a4, b0,b1,b2,b3,b4) \ 16 \ // b = a-p 17 MOVQ a0, b0 \ 18 MOVQ a1, b1 \ 19 MOVQ a2, b2 \ 20 MOVQ a3, b3 \ 21 MOVQ a4, b4 \ 22 \ 23 SUBQ ·p2+0(SB), b0 \ 24 SBBQ ·p2+8(SB), b1 \ 25 SBBQ ·p2+16(SB), b2 \ 26 SBBQ ·p2+24(SB), b3 \ 27 SBBQ $0, b4 \ 28 \ 29 \ // if b is negative then return a 30 \ // else return b 31 CMOVQCC b0, a0 \ 32 CMOVQCC b1, a1 \ 33 CMOVQCC b2, a2 \ 34 CMOVQCC b3, a3 35 36 #include "mul_amd64.h" 37 #include "mul_bmi2_amd64.h" 38 39 TEXT ·gfpNeg(SB),0,$0-16 40 MOVQ ·p2+0(SB), R8 41 MOVQ ·p2+8(SB), R9 42 MOVQ ·p2+16(SB), R10 43 MOVQ ·p2+24(SB), R11 44 45 MOVQ a+8(FP), DI 46 SUBQ 0(DI), R8 47 SBBQ 8(DI), R9 48 SBBQ 16(DI), R10 49 SBBQ 24(DI), R11 50 51 MOVQ $0, AX 52 gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,R15,BX) 53 54 MOVQ c+0(FP), DI 55 storeBlock(R8,R9,R10,R11, 0(DI)) 56 RET 57 58 TEXT ·gfpAdd(SB),0,$0-24 59 MOVQ a+8(FP), DI 60 MOVQ b+16(FP), SI 61 62 loadBlock(0(DI), R8,R9,R10,R11) 63 MOVQ $0, R12 64 65 ADDQ 0(SI), R8 66 ADCQ 8(SI), R9 67 ADCQ 16(SI), R10 68 ADCQ 24(SI), R11 69 ADCQ $0, R12 70 71 gfpCarry(R8,R9,R10,R11,R12, R13,R14,R15,AX,BX) 72 73 MOVQ c+0(FP), DI 74 storeBlock(R8,R9,R10,R11, 0(DI)) 75 RET 76 77 TEXT ·gfpSub(SB),0,$0-24 78 MOVQ a+8(FP), DI 79 MOVQ b+16(FP), SI 80 81 loadBlock(0(DI), R8,R9,R10,R11) 82 83 MOVQ ·p2+0(SB), R12 84 MOVQ ·p2+8(SB), R13 85 MOVQ ·p2+16(SB), R14 86 MOVQ ·p2+24(SB), R15 87 MOVQ $0, AX 88 89 SUBQ 0(SI), R8 90 SBBQ 8(SI), R9 91 SBBQ 16(SI), R10 92 SBBQ 24(SI), R11 93 94 CMOVQCC AX, R12 95 CMOVQCC AX, R13 96 CMOVQCC AX, R14 97 CMOVQCC AX, R15 98 99 ADDQ R12, R8 100 ADCQ R13, R9 101 ADCQ R14, R10 102 ADCQ R15, R11 103 104 MOVQ c+0(FP), DI 105 storeBlock(R8,R9,R10,R11, 0(DI)) 106 RET 107 108 TEXT ·gfpMul(SB),0,$160-24 109 MOVQ a+8(FP), DI 110 MOVQ b+16(FP), SI 111 112 // Jump to a slightly different implementation if MULX isn't supported. 113 CMPB ·hasBMI2(SB), $0 114 JE nobmi2Mul 115 116 mulBMI2(0(DI),8(DI),16(DI),24(DI), 0(SI)) 117 storeBlock( R8, R9,R10,R11, 0(SP)) 118 storeBlock(R12,R13,R14,R15, 32(SP)) 119 gfpReduceBMI2() 120 JMP end 121 122 nobmi2Mul: 123 mul(0(DI),8(DI),16(DI),24(DI), 0(SI), 0(SP)) 124 gfpReduce(0(SP)) 125 126 end: 127 MOVQ c+0(FP), DI 128 storeBlock(R12,R13,R14,R15, 0(DI)) 129 RET