github.com/cloudflare/circl@v1.5.0/math/fp25519/fp_amd64.s (about) 1 //go:build amd64 && !purego 2 // +build amd64,!purego 3 4 #include "textflag.h" 5 #include "fp_amd64.h" 6 7 // func cmovAmd64(x, y *Elt, n uint) 8 TEXT ·cmovAmd64(SB),NOSPLIT,$0-24 9 MOVQ x+0(FP), DI 10 MOVQ y+8(FP), SI 11 MOVQ n+16(FP), BX 12 cselect(0(DI),0(SI),BX) 13 RET 14 15 // func cswapAmd64(x, y *Elt, n uint) 16 TEXT ·cswapAmd64(SB),NOSPLIT,$0-24 17 MOVQ x+0(FP), DI 18 MOVQ y+8(FP), SI 19 MOVQ n+16(FP), BX 20 cswap(0(DI),0(SI),BX) 21 RET 22 23 // func subAmd64(z, x, y *Elt) 24 TEXT ·subAmd64(SB),NOSPLIT,$0-24 25 MOVQ z+0(FP), DI 26 MOVQ x+8(FP), SI 27 MOVQ y+16(FP), BX 28 subtraction(0(DI),0(SI),0(BX)) 29 RET 30 31 // func addsubAmd64(x, y *Elt) 32 TEXT ·addsubAmd64(SB),NOSPLIT,$0-16 33 MOVQ x+0(FP), DI 34 MOVQ y+8(FP), SI 35 addSub(0(DI),0(SI)) 36 RET 37 38 #define addLegacy \ 39 additionLeg(0(DI),0(SI),0(BX)) 40 #define addBmi2Adx \ 41 additionAdx(0(DI),0(SI),0(BX)) 42 43 #define mulLegacy \ 44 integerMulLeg(0(SP),0(SI),0(BX)) \ 45 reduceFromDoubleLeg(0(DI),0(SP)) 46 #define mulBmi2Adx \ 47 integerMulAdx(0(SP),0(SI),0(BX)) \ 48 reduceFromDoubleAdx(0(DI),0(SP)) 49 50 #define sqrLegacy \ 51 integerSqrLeg(0(SP),0(SI)) \ 52 reduceFromDoubleLeg(0(DI),0(SP)) 53 #define sqrBmi2Adx \ 54 integerSqrAdx(0(SP),0(SI)) \ 55 reduceFromDoubleAdx(0(DI),0(SP)) 56 57 // func addAmd64(z, x, y *Elt) 58 TEXT ·addAmd64(SB),NOSPLIT,$0-24 59 MOVQ z+0(FP), DI 60 MOVQ x+8(FP), SI 61 MOVQ y+16(FP), BX 62 CHECK_BMI2ADX(LADD, addLegacy, addBmi2Adx) 63 64 // func mulAmd64(z, x, y *Elt) 65 TEXT ·mulAmd64(SB),NOSPLIT,$64-24 66 MOVQ z+0(FP), DI 67 MOVQ x+8(FP), SI 68 MOVQ y+16(FP), BX 69 CHECK_BMI2ADX(LMUL, mulLegacy, mulBmi2Adx) 70 71 // func sqrAmd64(z, x *Elt) 72 TEXT ·sqrAmd64(SB),NOSPLIT,$64-16 73 MOVQ z+0(FP), DI 74 MOVQ x+8(FP), SI 75 CHECK_BMI2ADX(LSQR, sqrLegacy, sqrBmi2Adx) 76 77 // func modpAmd64(z *Elt) 78 TEXT ·modpAmd64(SB),NOSPLIT,$0-8 79 MOVQ z+0(FP), DI 80 81 MOVQ (DI), R8 82 MOVQ 8(DI), R9 83 MOVQ 16(DI), R10 84 MOVQ 24(DI), R11 85 86 MOVL $19, AX 87 MOVL $38, CX 88 89 BTRQ $63, R11 // PUT BIT 255 IN CARRY FLAG AND CLEAR 90 CMOVLCC AX, CX // C[255] ? 38 : 19 91 92 // ADD EITHER 19 OR 38 TO C 93 ADDQ CX, R8 94 ADCQ $0, R9 95 ADCQ $0, R10 96 ADCQ $0, R11 97 98 // TEST FOR BIT 255 AGAIN; ONLY TRIGGERED ON OVERFLOW MODULO 2^255-19 99 MOVL $0, CX 100 CMOVLPL AX, CX // C[255] ? 0 : 19 101 BTRQ $63, R11 // CLEAR BIT 255 102 103 // SUBTRACT 19 IF NECESSARY 104 SUBQ CX, R8 105 MOVQ R8, (DI) 106 SBBQ $0, R9 107 MOVQ R9, 8(DI) 108 SBBQ $0, R10 109 MOVQ R10, 16(DI) 110 SBBQ $0, R11 111 MOVQ R11, 24(DI) 112 RET