github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/Yawning/chacha20/chacha20_amd64.go (about)

     1  // chacha20_amd64.go - AMD64 optimized chacha20.
     2  //
     3  // To the extent possible under law, Yawning Angel has waived all copyright
     4  // and related or neighboring rights to chacha20, using the Creative
     5  // Commons "CC0" public domain dedication. See LICENSE or
     6  // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
     7  
     8  // +build amd64,!gccgo,!appengine
     9  
    10  package chacha20
    11  
    12  import (
    13  	"math"
    14  )
    15  
    16  var usingAVX2 = false
    17  
    18  func blocksAmd64SSE2(x *uint32, inp, outp *byte, nrBlocks uint)
    19  
    20  func blocksAmd64AVX2(x *uint32, inp, outp *byte, nrBlocks uint)
    21  
    22  func cpuidAmd64(cpuidParams *uint32)
    23  
    24  func xgetbv0Amd64(xcrVec *uint32)
    25  
    26  func blocksAmd64(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf bool) {
    27  	// Probably unneeded, but stating this explicitly simplifies the assembly.
    28  	if nrBlocks == 0 {
    29  		return
    30  	}
    31  
    32  	if isIetf {
    33  		var totalBlocks uint64
    34  		totalBlocks = uint64(x[12]) + uint64(nrBlocks)
    35  		if totalBlocks > math.MaxUint32 {
    36  			panic("chacha20: Exceeded keystream per nonce limit")
    37  		}
    38  	}
    39  
    40  	if in == nil {
    41  		for i := range out {
    42  			out[i] = 0
    43  		}
    44  		in = out
    45  	}
    46  
    47  	// Pointless to call the AVX2 code for just a single block, since half of
    48  	// the output gets discarded...
    49  	if usingAVX2 && nrBlocks > 1 {
    50  		blocksAmd64AVX2(&x[0], &in[0], &out[0], uint(nrBlocks))
    51  	} else {
    52  		blocksAmd64SSE2(&x[0], &in[0], &out[0], uint(nrBlocks))
    53  	}
    54  }
    55  
    56  func supportsAVX2() bool {
    57  	// https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
    58  	const (
    59  		osXsaveBit = 1 << 27
    60  		avx2Bit    = 1 << 5
    61  	)
    62  
    63  	// Check to see if CPUID actually supports the leaf that indicates AVX2.
    64  	// CPUID.(EAX=0H, ECX=0H) >= 7
    65  	regs := [4]uint32{0x00}
    66  	cpuidAmd64(&regs[0])
    67  	if regs[0] < 7 {
    68  		return false
    69  	}
    70  
    71  	// Check to see if the OS knows how to save/restore XMM/YMM state.
    72  	// CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1
    73  	regs = [4]uint32{0x01}
    74  	cpuidAmd64(&regs[0])
    75  	if regs[2]&osXsaveBit == 0 {
    76  		return false
    77  	}
    78  	xcrRegs := [2]uint32{}
    79  	xgetbv0Amd64(&xcrRegs[0])
    80  	if xcrRegs[0]&6 != 6 {
    81  		return false
    82  	}
    83  
    84  	// Check for AVX2 support.
    85  	// CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1
    86  	regs = [4]uint32{0x07}
    87  	cpuidAmd64(&regs[0])
    88  	return regs[1]&avx2Bit != 0
    89  }
    90  
    91  func init() {
    92  	blocksFn = blocksAmd64
    93  	usingVectors = true
    94  	usingAVX2 = supportsAVX2()
    95  }