github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/aez/aez_amd64.go (about)

     1  // aez_amd64.go - AMD64 specific routines.
     2  //
     3  // To the extent possible under law, Yawning Angel has waived all copyright
     4  // and related or neighboring rights to aez, using the Creative
     5  // Commons "CC0" public domain dedication. See LICENSE or
     6  // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
     7  
     8  // +build amd64,!gccgo,!appengine,!noasm
     9  
    10  package aez
    11  
    12  var useAESNI = false
    13  
    14  //go:noescape
    15  func cpuidAMD64(cpuidParams *uint32)
    16  
    17  //go:noescape
    18  func resetAMD64SSE2()
    19  
    20  //go:noescape
    21  func xorBytes1x16AMD64SSE2(a, b, dst *byte)
    22  
    23  //go:noescape
    24  func xorBytes4x16AMD64SSE2(a, b, c, d, dst *byte)
    25  
    26  //go:noescape
    27  func aezAES4AMD64AESNI(j, i, l, k, src, dst *byte)
    28  
    29  //go:noescape
    30  func aezAES10AMD64AESNI(l, k, src, dst *byte)
    31  
    32  //go:noescape
    33  func aezCorePass1AMD64AESNI(src, dst, x, i, l, k, consts *byte, sz int)
    34  
    35  //go:noescape
    36  func aezCorePass2AMD64AESNI(dst, y, s, j, i, l, k, consts *byte, sz int)
    37  
    38  func xorBytes1x16(a, b, dst []byte) {
    39  	xorBytes1x16AMD64SSE2(&a[0], &b[0], &dst[0])
    40  }
    41  
    42  func xorBytes4x16(a, b, c, d, dst []byte) {
    43  	xorBytes4x16AMD64SSE2(&a[0], &b[0], &c[0], &d[0], &dst[0])
    44  }
    45  
    46  type roundAESNI struct {
    47  	keys [extractedKeySize]byte
    48  }
    49  
    50  func (r *roundAESNI) Reset() {
    51  	memwipe(r.keys[:])
    52  	resetAMD64SSE2()
    53  }
    54  
    55  func (r *roundAESNI) AES4(j, i, l *[blockSize]byte, src []byte, dst *[blockSize]byte) {
    56  	aezAES4AMD64AESNI(&j[0], &i[0], &l[0], &r.keys[0], &src[0], &dst[0])
    57  }
    58  
    59  func (r *roundAESNI) AES10(l *[blockSize]byte, src []byte, dst *[blockSize]byte) {
    60  	aezAES10AMD64AESNI(&l[0], &r.keys[0], &src[0], &dst[0])
    61  }
    62  
    63  func newRoundAESNI(extractedKey *[extractedKeySize]byte) aesImpl {
    64  	r := new(roundAESNI)
    65  	copy(r.keys[:], extractedKey[:])
    66  
    67  	return r
    68  }
    69  
    70  var dblConsts = [32]byte{
    71  	// PSHUFB constant
    72  	0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
    73  	0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
    74  
    75  	// Mask constant
    76  	0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
    77  	0x01, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00,
    78  }
    79  
    80  func (e *eState) aezCorePass1(in, out []byte, X *[blockSize]byte, sz int) {
    81  	// Call the "slow" implementation if hardware/OS doesn't allow AES-NI.
    82  	if !useAESNI {
    83  		e.aezCorePass1Slow(in, out, X, sz)
    84  		return
    85  	}
    86  
    87  	// Call the AES-NI implementation.
    88  	a := e.aes.(*roundAESNI)
    89  	aezCorePass1AMD64AESNI(&in[0], &out[0], &X[0], &e.I[1][0], &e.L[0][0], &a.keys[0], &dblConsts[0], sz)
    90  }
    91  
    92  func (e *eState) aezCorePass2(in, out []byte, Y, S *[blockSize]byte, sz int) {
    93  	// Call the "slow" implementation if hardware/OS doesn't allow AES-NI.
    94  	if !useAESNI {
    95  		e.aezCorePass2Slow(in, out, Y, S, sz)
    96  		return
    97  	}
    98  
    99  	// Call the AES-NI implementation.
   100  	a := e.aes.(*roundAESNI)
   101  	aezCorePass2AMD64AESNI(&out[0], &Y[0], &S[0], &e.J[0][0], &e.I[1][0], &e.L[0][0], &a.keys[0], &dblConsts[0], sz)
   102  }
   103  
   104  func supportsAESNI() bool {
   105  	const aesniBit = 1 << 25
   106  
   107  	// Check for AES-NI support.
   108  	// CPUID.(EAX=01H, ECX=0H):ECX.AESNI[bit 25]==1
   109  	regs := [4]uint32{0x01}
   110  	cpuidAMD64(&regs[0])
   111  
   112  	return regs[2]&aesniBit != 0
   113  }
   114  
   115  func platformInit() {
   116  	useAESNI = supportsAESNI()
   117  	if useAESNI {
   118  		newAes = newRoundAESNI
   119  		isHardwareAccelerated = true
   120  	}
   121  }