github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/Yawning/chacha20/chacha20_amd64.go (about) 1 // chacha20_amd64.go - AMD64 optimized chacha20. 2 // 3 // To the extent possible under law, Yawning Angel has waived all copyright 4 // and related or neighboring rights to chacha20, using the Creative 5 // Commons "CC0" public domain dedication. See LICENSE or 6 // <http://creativecommons.org/publicdomain/zero/1.0/> for full details. 7 8 // +build amd64,!gccgo,!appengine 9 10 package chacha20 11 12 import ( 13 "math" 14 ) 15 16 var usingAVX2 = false 17 18 func blocksAmd64SSE2(x *uint32, inp, outp *byte, nrBlocks uint) 19 20 func blocksAmd64AVX2(x *uint32, inp, outp *byte, nrBlocks uint) 21 22 func cpuidAmd64(cpuidParams *uint32) 23 24 func xgetbv0Amd64(xcrVec *uint32) 25 26 func blocksAmd64(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf bool) { 27 // Probably unneeded, but stating this explicitly simplifies the assembly. 28 if nrBlocks == 0 { 29 return 30 } 31 32 if isIetf { 33 var totalBlocks uint64 34 totalBlocks = uint64(x[12]) + uint64(nrBlocks) 35 if totalBlocks > math.MaxUint32 { 36 panic("chacha20: Exceeded keystream per nonce limit") 37 } 38 } 39 40 if in == nil { 41 for i := range out { 42 out[i] = 0 43 } 44 in = out 45 } 46 47 // Pointless to call the AVX2 code for just a single block, since half of 48 // the output gets discarded... 49 if usingAVX2 && nrBlocks > 1 { 50 blocksAmd64AVX2(&x[0], &in[0], &out[0], uint(nrBlocks)) 51 } else { 52 blocksAmd64SSE2(&x[0], &in[0], &out[0], uint(nrBlocks)) 53 } 54 } 55 56 func supportsAVX2() bool { 57 // https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family 58 const ( 59 osXsaveBit = 1 << 27 60 avx2Bit = 1 << 5 61 ) 62 63 // Check to see if CPUID actually supports the leaf that indicates AVX2. 64 // CPUID.(EAX=0H, ECX=0H) >= 7 65 regs := [4]uint32{0x00} 66 cpuidAmd64(®s[0]) 67 if regs[0] < 7 { 68 return false 69 } 70 71 // Check to see if the OS knows how to save/restore XMM/YMM state. 72 // CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1 73 regs = [4]uint32{0x01} 74 cpuidAmd64(®s[0]) 75 if regs[2]&osXsaveBit == 0 { 76 return false 77 } 78 xcrRegs := [2]uint32{} 79 xgetbv0Amd64(&xcrRegs[0]) 80 if xcrRegs[0]&6 != 6 { 81 return false 82 } 83 84 // Check for AVX2 support. 85 // CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 86 regs = [4]uint32{0x07} 87 cpuidAmd64(®s[0]) 88 return regs[1]&avx2Bit != 0 89 } 90 91 func init() { 92 blocksFn = blocksAmd64 93 usingVectors = true 94 usingAVX2 = supportsAVX2() 95 }