github.com/cloudflare/circl@v1.5.0/math/fp25519/fp_amd64.s (about)

     1  //go:build amd64 && !purego
     2  // +build amd64,!purego
     3  
     4  #include "textflag.h"
     5  #include "fp_amd64.h"
     6  
     7  // func cmovAmd64(x, y *Elt, n uint)
     8  TEXT ·cmovAmd64(SB),NOSPLIT,$0-24
     9      MOVQ x+0(FP), DI
    10      MOVQ y+8(FP), SI
    11      MOVQ n+16(FP), BX
    12      cselect(0(DI),0(SI),BX)
    13      RET
    14  
    15  // func cswapAmd64(x, y *Elt, n uint)
    16  TEXT ·cswapAmd64(SB),NOSPLIT,$0-24
    17      MOVQ x+0(FP), DI
    18      MOVQ y+8(FP), SI
    19      MOVQ n+16(FP), BX
    20      cswap(0(DI),0(SI),BX)
    21      RET
    22  
    23  // func subAmd64(z, x, y *Elt)
    24  TEXT ·subAmd64(SB),NOSPLIT,$0-24
    25      MOVQ z+0(FP), DI
    26      MOVQ x+8(FP), SI
    27      MOVQ y+16(FP), BX
    28      subtraction(0(DI),0(SI),0(BX))
    29      RET
    30  
    31  // func addsubAmd64(x, y *Elt)
    32  TEXT ·addsubAmd64(SB),NOSPLIT,$0-16
    33      MOVQ x+0(FP), DI
    34      MOVQ y+8(FP), SI
    35      addSub(0(DI),0(SI))
    36      RET
    37  
    38  #define addLegacy \
    39      additionLeg(0(DI),0(SI),0(BX))
    40  #define addBmi2Adx \
    41      additionAdx(0(DI),0(SI),0(BX))
    42  
    43  #define mulLegacy \
    44      integerMulLeg(0(SP),0(SI),0(BX)) \
    45      reduceFromDoubleLeg(0(DI),0(SP))
    46  #define mulBmi2Adx \
    47      integerMulAdx(0(SP),0(SI),0(BX)) \
    48      reduceFromDoubleAdx(0(DI),0(SP))
    49  
    50  #define sqrLegacy \
    51      integerSqrLeg(0(SP),0(SI)) \
    52      reduceFromDoubleLeg(0(DI),0(SP))
    53  #define sqrBmi2Adx \
    54      integerSqrAdx(0(SP),0(SI)) \
    55      reduceFromDoubleAdx(0(DI),0(SP))
    56  
    57  // func addAmd64(z, x, y *Elt)
    58  TEXT ·addAmd64(SB),NOSPLIT,$0-24
    59      MOVQ z+0(FP), DI
    60      MOVQ x+8(FP), SI
    61      MOVQ y+16(FP), BX
    62      CHECK_BMI2ADX(LADD, addLegacy, addBmi2Adx)
    63  
    64  // func mulAmd64(z, x, y *Elt)
    65  TEXT ·mulAmd64(SB),NOSPLIT,$64-24
    66      MOVQ z+0(FP), DI
    67      MOVQ x+8(FP), SI
    68      MOVQ y+16(FP), BX
    69      CHECK_BMI2ADX(LMUL, mulLegacy, mulBmi2Adx)
    70  
    71  // func sqrAmd64(z, x *Elt)
    72  TEXT ·sqrAmd64(SB),NOSPLIT,$64-16
    73      MOVQ z+0(FP), DI
    74      MOVQ x+8(FP), SI
    75      CHECK_BMI2ADX(LSQR, sqrLegacy, sqrBmi2Adx)
    76  
    77  // func modpAmd64(z *Elt)
    78  TEXT ·modpAmd64(SB),NOSPLIT,$0-8
    79      MOVQ z+0(FP), DI
    80  
    81      MOVQ   (DI),  R8
    82      MOVQ  8(DI),  R9
    83      MOVQ 16(DI), R10
    84      MOVQ 24(DI), R11
    85  
    86      MOVL $19, AX
    87      MOVL $38, CX
    88  
    89      BTRQ $63, R11 // PUT BIT 255 IN CARRY FLAG AND CLEAR
    90      CMOVLCC AX, CX // C[255] ? 38 : 19
    91  
    92      // ADD EITHER 19 OR 38 TO C
    93      ADDQ CX,  R8
    94      ADCQ $0,  R9
    95      ADCQ $0, R10
    96      ADCQ $0, R11
    97  
    98      // TEST FOR BIT 255 AGAIN; ONLY TRIGGERED ON OVERFLOW MODULO 2^255-19
    99      MOVL     $0,  CX
   100      CMOVLPL  AX,  CX // C[255] ? 0 : 19
   101      BTRQ    $63, R11 // CLEAR BIT 255
   102  
   103      // SUBTRACT 19 IF NECESSARY
   104      SUBQ CX,  R8
   105      MOVQ  R8,   (DI)
   106      SBBQ $0,  R9
   107      MOVQ  R9,  8(DI)
   108      SBBQ $0, R10
   109      MOVQ R10, 16(DI)
   110      SBBQ $0, R11
   111      MOVQ R11, 24(DI)
   112      RET