github.com/cloudflare/circl@v1.5.0/dh/csidh/fp511_amd64.go (about)

     1  //go:build amd64 && !purego
     2  // +build amd64,!purego
     3  
     4  package csidh
     5  
     6  import (
     7  	"math/bits"
     8  
     9  	"golang.org/x/sys/cpu"
    10  )
    11  
    12  var (
    13  	// Signals support for BMI2 (MULX)
    14  	hasBMI2 = cpu.X86.HasBMI2
    15  	// Signals support for ADX and BMI2
    16  	hasADXandBMI2 = cpu.X86.HasBMI2 && cpu.X86.HasADX
    17  )
    18  
    19  var _ = hasBMI2
    20  
    21  func mul512(r, m1 *fp, m2 uint64)     { mul512Amd64(r, m1, m2) }
    22  func cswap512(x, y *fp, choice uint8) { cswap512Amd64(x, y, choice) }
    23  func mulRdc(r, x, y *fp)              { mulRdcAmd64(r, x, y) }
    24  
    25  //go:noescape
    26  func mul512Amd64(a, b *fp, c uint64)
    27  
    28  //go:noescape
    29  func cswap512Amd64(x, y *fp, choice uint8)
    30  
    31  //go:noescape
    32  func mulBmiAsm(res, x, y *fp)
    33  
    34  // mulRdc performs montgomery multiplication r = x * y mod P.
    35  // Returned result r is already reduced and in Montgomery domain.
    36  func mulRdcAmd64(r, x, y *fp) {
    37  	var t fp
    38  	var c uint64
    39  
    40  	if hasADXandBMI2 {
    41  		mulBmiAsm(r, x, y)
    42  	} else {
    43  		mulGeneric(r, x, y)
    44  	}
    45  
    46  	// if p <= r < 2p then r = r-p
    47  	t[0], c = bits.Sub64(r[0], p[0], 0)
    48  	t[1], c = bits.Sub64(r[1], p[1], c)
    49  	t[2], c = bits.Sub64(r[2], p[2], c)
    50  	t[3], c = bits.Sub64(r[3], p[3], c)
    51  	t[4], c = bits.Sub64(r[4], p[4], c)
    52  	t[5], c = bits.Sub64(r[5], p[5], c)
    53  	t[6], c = bits.Sub64(r[6], p[6], c)
    54  	t[7], c = bits.Sub64(r[7], p[7], c)
    55  
    56  	w := 0 - c
    57  	r[0] = ctPick64(w, r[0], t[0])
    58  	r[1] = ctPick64(w, r[1], t[1])
    59  	r[2] = ctPick64(w, r[2], t[2])
    60  	r[3] = ctPick64(w, r[3], t[3])
    61  	r[4] = ctPick64(w, r[4], t[4])
    62  	r[5] = ctPick64(w, r[5], t[5])
    63  	r[6] = ctPick64(w, r[6], t[6])
    64  	r[7] = ctPick64(w, r[7], t[7])
    65  }