github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/aez/aez.go (about)

     1  // aez.go - An AEZ implementation.
     2  //
     3  // To the extent possible under law, Yawning Angel has waived all copyright
     4  // and related or neighboring rights to aez, using the Creative
     5  // Commons "CC0" public domain dedication. See LICENSE or
     6  // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
     7  //
     8  // This implementation is primarily derived from the AEZ v5 reference code
     9  // available at: http://www.cs.ucdavis.edu/~rogaway/aez
    10  //
    11  // It started off as a straight forward port of the `ref` variant, but has
    12  // pulled in ideas from `aesni`.
    13  
    14  // Package aez implements the AEZ AEAD primitive.
    15  //
    16  // See: http://web.cs.ucdavis.edu/~rogaway/aez/
    17  package aez
    18  
    19  import (
    20  	"crypto/subtle"
    21  	"encoding/binary"
    22  	"math"
    23  
    24  	"golang.org/x/crypto/blake2b"
    25  )
    26  
    27  const (
    28  	// Version is the version of the AEZ specification implemented.
    29  	Version = "v5"
    30  
    31  	extractedKeySize = 3 * 16
    32  	blockSize        = 16
    33  )
    34  
    35  var (
    36  	newAes                aesImplCtor = nil
    37  	zero                              = [blockSize]byte{}
    38  	isHardwareAccelerated             = false
    39  )
    40  
    41  func extract(k []byte, extractedKey *[extractedKeySize]byte) {
    42  	if len(k) == extractedKeySize {
    43  		copy(extractedKey[:], k)
    44  	} else {
    45  		h, err := blake2b.New(extractedKeySize, nil)
    46  		if err != nil {
    47  			panic("aez: Extract: " + err.Error())
    48  		}
    49  		defer h.Reset()
    50  		h.Write(k)
    51  		tmp := h.Sum(nil)
    52  		copy(extractedKey[:], tmp)
    53  		memwipe(tmp)
    54  	}
    55  }
    56  
    57  type aesImpl interface {
    58  	Reset()
    59  
    60  	AES4(j, i, l *[blockSize]byte, src []byte, dst *[blockSize]byte)
    61  	AES10(l *[blockSize]byte, src []byte, dst *[blockSize]byte)
    62  }
    63  
    64  type aesImplCtor func(*[extractedKeySize]byte) aesImpl
    65  
    66  type eState struct {
    67  	I   [2][16]byte // 1I, 2I
    68  	J   [3][16]byte // 1J, 2J, 4J
    69  	L   [8][16]byte // 0L, 1L ... 7L
    70  	aes aesImpl
    71  }
    72  
    73  func (e *eState) init(k []byte) {
    74  	var extractedKey [extractedKeySize]byte
    75  	defer memwipe(extractedKey[:])
    76  
    77  	extract(k, &extractedKey)
    78  
    79  	copy(e.I[0][:], extractedKey[0:16]) // 1I
    80  	multBlock(2, &e.I[0], &e.I[1])      // 2I
    81  
    82  	copy(e.J[0][:], extractedKey[16:32]) // 1J
    83  	multBlock(2, &e.J[0], &e.J[1])       // 2J
    84  	multBlock(2, &e.J[1], &e.J[2])       // 4J
    85  
    86  	// The upstream `aesni` code only stores L1, L2, and L4, but it has
    87  	// the benefit of being written in a real language that has vector
    88  	// intrinsics.
    89  
    90  	// multBlock(0, &e.L, &e.L[0])                // L0 (all `0x00`s)
    91  	copy(e.L[1][:], extractedKey[32:48])          // L1
    92  	multBlock(2, &e.L[1], &e.L[2])                // L2 = L1*2
    93  	xorBytes1x16(e.L[2][:], e.L[1][:], e.L[3][:]) // L3 = L2+L1
    94  	multBlock(2, &e.L[2], &e.L[4])                // L4 = L2*2
    95  	xorBytes1x16(e.L[4][:], e.L[1][:], e.L[5][:]) // L5 = L4+L1
    96  	multBlock(2, &e.L[3], &e.L[6])                // L6 = L3*2
    97  	xorBytes1x16(e.L[6][:], e.L[1][:], e.L[7][:]) // L7 = L6+L1
    98  
    99  	e.aes = newAes(&extractedKey)
   100  }
   101  
   102  func (e *eState) reset() {
   103  	for i := range e.I {
   104  		memwipe(e.I[i][:])
   105  	}
   106  	for i := range e.J {
   107  		memwipe(e.J[i][:])
   108  	}
   109  	for i := range e.L {
   110  		memwipe(e.L[i][:])
   111  	}
   112  	e.aes.Reset()
   113  }
   114  
   115  func multBlock(x uint, src, dst *[blockSize]byte) {
   116  	var t, r [blockSize]byte
   117  
   118  	copy(t[:], src[:])
   119  	for x != 0 {
   120  		if x&1 != 0 { // This is fine, x isn't data/secret dependent.
   121  			xorBytes1x16(r[:], t[:], r[:])
   122  		}
   123  		doubleBlock(&t)
   124  		x >>= 1
   125  	}
   126  	copy(dst[:], r[:])
   127  
   128  	memwipe(t[:])
   129  	memwipe(r[:])
   130  }
   131  
   132  func doubleBlock(p *[blockSize]byte) {
   133  	tmp := p[0]
   134  	for i := 0; i < 15; i++ {
   135  		p[i] = (p[i] << 1) | (p[i+1] >> 7)
   136  	}
   137  	// p[15] = (p[15] << 1) ^ ((tmp >> 7)?135:0);
   138  	s := subtle.ConstantTimeByteEq(tmp>>7, 1)
   139  	p[15] = (p[15] << 1) ^ byte(subtle.ConstantTimeSelect(s, 135, 0))
   140  }
   141  
   142  func (e *eState) aezHash(nonce []byte, ad [][]byte, tau int, result []byte) {
   143  	var buf, sum, I, J [blockSize]byte
   144  
   145  	if len(result) != blockSize {
   146  		panic("aez: Hash: len(result)")
   147  	}
   148  
   149  	// Initialize sum with hash of tau
   150  	binary.BigEndian.PutUint32(buf[12:], uint32(tau))
   151  	xorBytes1x16(e.J[0][:], e.J[1][:], J[:])       // J ^ J2
   152  	e.aes.AES4(&J, &e.I[1], &e.L[1], buf[:], &sum) // E(3,1)
   153  
   154  	// Hash nonce, accumulate into sum
   155  	empty := len(nonce) == 0
   156  	n := nonce
   157  	nBytes := uint(len(nonce))
   158  	copy(I[:], e.I[1][:])
   159  	for i := uint(1); nBytes >= blockSize; i, nBytes = i+1, nBytes-blockSize {
   160  		e.aes.AES4(&e.J[2], &I, &e.L[i%8], n[:blockSize], &buf) // E(4,i)
   161  		xorBytes1x16(sum[:], buf[:], sum[:])
   162  		n = n[blockSize:]
   163  		if i%8 == 0 {
   164  			doubleBlock(&I)
   165  		}
   166  	}
   167  	if nBytes > 0 || empty {
   168  		memwipe(buf[:])
   169  		copy(buf[:], n)
   170  		buf[nBytes] = 0x80
   171  		e.aes.AES4(&e.J[2], &e.I[0], &e.L[0], buf[:], &buf) // E(4,0)
   172  		xorBytes1x16(sum[:], buf[:], sum[:])
   173  	}
   174  
   175  	// Hash each vector element, accumulate into sum
   176  	for k, p := range ad {
   177  		empty = len(p) == 0
   178  		bytes := uint(len(p))
   179  		copy(I[:], e.I[1][:])
   180  		multBlock(uint(5+k), &e.J[0], &J) // XXX/performance.
   181  		for i := uint(1); bytes >= blockSize; i, bytes = i+1, bytes-blockSize {
   182  			e.aes.AES4(&J, &I, &e.L[i%8], p[:blockSize], &buf) // E(5+k,i)
   183  			xorBytes1x16(sum[:], buf[:], sum[:])
   184  			p = p[blockSize:]
   185  			if i%8 == 0 {
   186  				doubleBlock(&I)
   187  			}
   188  		}
   189  		if bytes > 0 || empty {
   190  			memwipe(buf[:])
   191  			copy(buf[:], p)
   192  			buf[bytes] = 0x80
   193  			e.aes.AES4(&J, &e.I[0], &e.L[0], buf[:], &buf) // E(5+k,0)
   194  			xorBytes1x16(sum[:], buf[:], sum[:])
   195  		}
   196  	}
   197  
   198  	memwipe(I[:])
   199  	memwipe(J[:])
   200  
   201  	copy(result, sum[:])
   202  }
   203  
   204  func (e *eState) aezPRF(delta *[blockSize]byte, tau int, result []byte) {
   205  	var buf, ctr [blockSize]byte
   206  
   207  	off := 0
   208  	for tau >= blockSize {
   209  		xorBytes1x16(delta[:], ctr[:], buf[:])
   210  		e.aes.AES10(&e.L[3], buf[:], &buf) // E(-1,3)
   211  		copy(result[off:], buf[:])
   212  
   213  		i := 15
   214  		for { // ctr += 1
   215  			ctr[i]++
   216  			i--
   217  			if ctr[i+1] != 0 {
   218  				break
   219  			}
   220  		}
   221  
   222  		tau -= blockSize
   223  		off += blockSize
   224  	}
   225  	if tau > 0 {
   226  		xorBytes1x16(delta[:], ctr[:], buf[:])
   227  		e.aes.AES10(&e.L[3], buf[:], &buf) // E(-1,3)
   228  
   229  		copy(result[off:], buf[:])
   230  	}
   231  
   232  	memwipe(buf[:])
   233  }
   234  
   235  func (e *eState) aezCorePass1Slow(in, out []byte, X *[blockSize]byte, sz int) {
   236  	// NB: The hardware accelerated case is handled prior to this function.
   237  
   238  	// Use one of the portable bitsliced options if possible.
   239  	switch a := e.aes.(type) {
   240  	case *roundB32:
   241  		a.aezCorePass1(e, in, out, X, sz)
   242  	case *roundB64:
   243  		a.aezCorePass1(e, in, out, X, sz)
   244  	default:
   245  		e.aezCorePass1Ref(in, out, X)
   246  	}
   247  }
   248  
   249  func (e *eState) aezCorePass2Slow(in, out []byte, Y, S *[blockSize]byte, sz int) {
   250  	// NB: The hardware accelerated case is handled prior to this function.
   251  
   252  	// Use one of the portable bitsliced options if possible.
   253  	switch a := e.aes.(type) {
   254  	case *roundB32:
   255  		a.aezCorePass2(e, out, Y, S, sz)
   256  	case *roundB64:
   257  		a.aezCorePass2(e, out, Y, S, sz)
   258  	default:
   259  		e.aezCorePass2Ref(in, out, Y, S)
   260  	}
   261  }
   262  
   263  func (e *eState) aezCorePass1Ref(in, out []byte, X *[blockSize]byte) {
   264  	var tmp, I [blockSize]byte
   265  
   266  	copy(I[:], e.I[1][:])
   267  	for i, inBytes := uint(1), len(in); inBytes >= 64; i, inBytes = i+1, inBytes-32 {
   268  		e.aes.AES4(&e.J[0], &I, &e.L[i%8], in[blockSize:blockSize*2], &tmp) // E(1,i)
   269  		xorBytes1x16(in[:], tmp[:], out[:blockSize])
   270  
   271  		e.aes.AES4(&zero, &e.I[0], &e.L[0], out[:blockSize], &tmp) // E(0,0)
   272  		xorBytes1x16(in[blockSize:], tmp[:], out[blockSize:blockSize*2])
   273  		xorBytes1x16(out[blockSize:], X[:], X[:])
   274  
   275  		in, out = in[32:], out[32:]
   276  		if i%8 == 0 {
   277  			doubleBlock(&I)
   278  		}
   279  	}
   280  
   281  	memwipe(tmp[:])
   282  	memwipe(I[:])
   283  }
   284  
   285  func (e *eState) aezCorePass2Ref(in, out []byte, Y, S *[blockSize]byte) {
   286  	var tmp, I [blockSize]byte
   287  
   288  	copy(I[:], e.I[1][:])
   289  	for i, inBytes := uint(1), len(in); inBytes >= 64; i, inBytes = i+1, inBytes-32 {
   290  		e.aes.AES4(&e.J[1], &I, &e.L[i%8], S[:], &tmp) // E(2,i)
   291  		xorBytes1x16(out, tmp[:], out[:blockSize])
   292  		xorBytes1x16(out[blockSize:], tmp[:], out[blockSize:blockSize*2])
   293  		xorBytes1x16(out, Y[:], Y[:])
   294  
   295  		e.aes.AES4(&zero, &e.I[0], &e.L[0], out[blockSize:blockSize*2], &tmp) // E(0,0)
   296  		xorBytes1x16(out, tmp[:], out[:blockSize])
   297  
   298  		e.aes.AES4(&e.J[0], &I, &e.L[i%8], out[:blockSize], &tmp) // E(1,i)
   299  		xorBytes1x16(out[blockSize:], tmp[:], out[blockSize:blockSize*2])
   300  
   301  		swapBlocks(&tmp, out)
   302  
   303  		in, out = in[32:], out[32:]
   304  		if i%8 == 0 {
   305  			doubleBlock(&I)
   306  		}
   307  	}
   308  
   309  	memwipe(I[:])
   310  	memwipe(tmp[:])
   311  }
   312  
   313  func oneZeroPad(src []byte, sz int, dst *[blockSize]byte) {
   314  	memwipe(dst[:])
   315  	copy(dst[:], src[:sz])
   316  	dst[sz] = 0x80
   317  }
   318  
   319  func (e *eState) aezCore(delta *[blockSize]byte, in []byte, d uint, out []byte) {
   320  	var tmp, X, Y, S [blockSize]byte
   321  	outOrig, inOrig := out, in
   322  
   323  	fragBytes := len(in) % 32
   324  	initialBytes := len(in) - fragBytes - 32
   325  
   326  	// Compute X and store intermediate results
   327  	// Pass 1 over in[0:-32], store intermediate values in out[0:-32]
   328  	if len(in) >= 64 {
   329  		e.aezCorePass1(in, out, &X, initialBytes)
   330  	}
   331  
   332  	// Finish X calculation
   333  	in = in[initialBytes:]
   334  	if fragBytes >= blockSize {
   335  		e.aes.AES4(&zero, &e.I[1], &e.L[4], in[:blockSize], &tmp) // E(0,4)
   336  		xorBytes1x16(X[:], tmp[:], X[:])
   337  		oneZeroPad(in[blockSize:], fragBytes-blockSize, &tmp)
   338  		e.aes.AES4(&zero, &e.I[1], &e.L[5], tmp[:], &tmp) // E(0,5)
   339  		xorBytes1x16(X[:], tmp[:], X[:])
   340  	} else if fragBytes > 0 {
   341  		oneZeroPad(in, fragBytes, &tmp)
   342  		e.aes.AES4(&zero, &e.I[1], &e.L[4], tmp[:], &tmp) // E(0,4)
   343  		xorBytes1x16(X[:], tmp[:], X[:])
   344  	}
   345  
   346  	// Calculate S
   347  	out, in = outOrig[len(inOrig)-32:], inOrig[len(inOrig)-32:]
   348  	e.aes.AES4(&zero, &e.I[1], &e.L[(1+d)%8], in[blockSize:2*blockSize], &tmp) // E(0,1+d)
   349  	xorBytes4x16(X[:], in[:], delta[:], tmp[:], out[:blockSize])
   350  	e.aes.AES10(&e.L[(1+d)%8], out[:blockSize], &tmp) // E(-1,1+d)
   351  	xorBytes1x16(in[blockSize:], tmp[:], out[blockSize:blockSize*2])
   352  	xorBytes1x16(out, out[blockSize:], S[:])
   353  	// XXX/performance: Early abort if tag is corrupted.
   354  
   355  	// Pass 2 over intermediate values in out[32..]. Final values written
   356  	out, in = outOrig, inOrig
   357  	if len(in) >= 64 {
   358  		e.aezCorePass2(in, out, &Y, &S, initialBytes)
   359  	}
   360  
   361  	// Finish Y calculation and finish encryption of fragment bytes
   362  	out, in = out[initialBytes:], in[initialBytes:]
   363  	if fragBytes >= blockSize {
   364  		e.aes.AES10(&e.L[4], S[:], &tmp) // E(-1,4)
   365  		xorBytes1x16(in, tmp[:], out[:blockSize])
   366  		e.aes.AES4(&zero, &e.I[1], &e.L[4], out[:blockSize], &tmp) // E(0,4)
   367  		xorBytes1x16(Y[:], tmp[:], Y[:])
   368  
   369  		out, in = out[blockSize:], in[blockSize:]
   370  		fragBytes -= blockSize
   371  
   372  		e.aes.AES10(&e.L[5], S[:], &tmp)      // E(-1,5)
   373  		xorBytes(in, tmp[:], tmp[:fragBytes]) // non-16 byte xorBytes()
   374  		copy(out, tmp[:fragBytes])
   375  		memwipe(tmp[fragBytes:])
   376  		tmp[fragBytes] = 0x80
   377  		e.aes.AES4(&zero, &e.I[1], &e.L[5], tmp[:], &tmp) // E(0,5)
   378  		xorBytes1x16(Y[:], tmp[:], Y[:])
   379  	} else if fragBytes > 0 {
   380  		e.aes.AES10(&e.L[4], S[:], &tmp)      // E(-1,4)
   381  		xorBytes(in, tmp[:], tmp[:fragBytes]) // non-16 byte xorBytes()
   382  		copy(out, tmp[:fragBytes])
   383  		memwipe(tmp[fragBytes:])
   384  		tmp[fragBytes] = 0x80
   385  		e.aes.AES4(&zero, &e.I[1], &e.L[4], tmp[:], &tmp) // E(0,4)
   386  		xorBytes1x16(Y[:], tmp[:], Y[:])
   387  	}
   388  
   389  	// Finish encryption of last two blocks
   390  	out = outOrig[len(inOrig)-32:]
   391  	e.aes.AES10(&e.L[(2-d)%8], out[blockSize:], &tmp) // E(-1,2-d)
   392  	xorBytes1x16(out, tmp[:], out[:blockSize])
   393  	e.aes.AES4(&zero, &e.I[1], &e.L[(2-d)%8], out[:blockSize], &tmp) // E(0,2-d)
   394  	xorBytes4x16(tmp[:], out[blockSize:], delta[:], Y[:], out[blockSize:])
   395  	copy(tmp[:], out[:blockSize])
   396  	copy(out[:blockSize], out[blockSize:])
   397  	copy(out[blockSize:], tmp[:])
   398  
   399  	memwipe(X[:])
   400  	memwipe(Y[:])
   401  	memwipe(S[:])
   402  }
   403  
   404  func (e *eState) aezTiny(delta *[blockSize]byte, in []byte, d uint, out []byte) {
   405  	var rounds, i, j uint
   406  	var buf [2 * blockSize]byte
   407  	var L, R [blockSize]byte
   408  	var step int
   409  	mask, pad := byte(0x00), byte(0x80)
   410  	defer memwipe(L[:])
   411  	defer memwipe(R[:])
   412  
   413  	var tmp [16]byte
   414  
   415  	i = 7
   416  	inBytes := len(in)
   417  	if inBytes == 1 {
   418  		rounds = 24
   419  	} else if inBytes == 2 {
   420  		rounds = 16
   421  	} else if inBytes < 16 {
   422  		rounds = 10
   423  	} else {
   424  		i, rounds = 6, 8
   425  	}
   426  
   427  	// Split (inbytes*8)/2 bits into L and R. Beware: May end in nibble.
   428  	copy(L[:], in[:(inBytes+1)/2])
   429  	copy(R[:], in[inBytes/2:inBytes/2+(inBytes+1)/2])
   430  	if inBytes&1 != 0 { // Must shift R left by half a byte
   431  		for k := uint(0); k < uint(inBytes/2); k++ {
   432  			R[k] = (R[k] << 4) | (R[k+1] >> 4)
   433  		}
   434  		R[inBytes/2] = R[inBytes/2] << 4
   435  		pad = 0x08
   436  		mask = 0xf0
   437  	}
   438  	if d != 0 {
   439  		if inBytes < 16 {
   440  			memwipe(buf[:blockSize])
   441  			copy(buf[:], in)
   442  			buf[0] |= 0x80
   443  			xorBytes1x16(delta[:], buf[:], buf[:blockSize])
   444  			e.aes.AES4(&zero, &e.I[1], &e.L[3], buf[:blockSize], &tmp) // E(0,3)
   445  			L[0] ^= (tmp[0] & 0x80)
   446  		}
   447  		j, step = rounds-1, -1
   448  	} else {
   449  		step = 1
   450  	}
   451  	for k := uint(0); k < rounds/2; k, j = k+1, uint(int(j)+2*step) {
   452  		memwipe(buf[:blockSize])
   453  		copy(buf[:], R[:(inBytes+1)/2])
   454  		buf[inBytes/2] = (buf[inBytes/2] & mask) | pad
   455  		xorBytes1x16(buf[:], delta[:], buf[:blockSize])
   456  		buf[15] ^= byte(j)
   457  		e.aes.AES4(&zero, &e.I[1], &e.L[i], buf[:blockSize], &tmp) // E(0,i)
   458  		xorBytes1x16(L[:], tmp[:], L[:blockSize])
   459  
   460  		memwipe(buf[:blockSize])
   461  		copy(buf[:], L[:(inBytes+1)/2])
   462  		buf[inBytes/2] = (buf[inBytes/2] & mask) | pad
   463  		xorBytes1x16(buf[:], delta[:], buf[:blockSize])
   464  		buf[15] ^= byte(int(j) + step)
   465  		e.aes.AES4(&zero, &e.I[1], &e.L[i], buf[:blockSize], &tmp) // E(0,i)
   466  		xorBytes1x16(R[:], tmp[:], R[:blockSize])
   467  	}
   468  	copy(buf[:], R[:inBytes/2])
   469  	copy(buf[inBytes/2:], L[:(inBytes+1)/2])
   470  	if inBytes&1 != 0 {
   471  		for k := inBytes - 1; k > inBytes/2; k-- {
   472  			buf[k] = (buf[k] >> 4) | (buf[k-1] << 4)
   473  		}
   474  		buf[inBytes/2] = (L[0] >> 4) | (R[inBytes/2] & 0xf0)
   475  	}
   476  	copy(out, buf[:inBytes])
   477  	if inBytes < 16 && d == 0 {
   478  		memwipe(buf[inBytes:blockSize])
   479  		buf[0] |= 0x80
   480  		xorBytes1x16(delta[:], buf[:], buf[:blockSize])
   481  		e.aes.AES4(&zero, &e.I[1], &e.L[3], buf[:blockSize], &tmp) // E(0,3)
   482  		out[0] ^= tmp[0] & 0x80
   483  	}
   484  
   485  	memwipe(tmp[:])
   486  }
   487  
   488  func (e *eState) encipher(delta *[blockSize]byte, in, out []byte) {
   489  	if len(in) == 0 {
   490  		return
   491  	}
   492  
   493  	if len(in) < 32 {
   494  		e.aezTiny(delta, in, 0, out)
   495  	} else {
   496  		e.aezCore(delta, in, 0, out)
   497  	}
   498  }
   499  
   500  func (e *eState) decipher(delta *[blockSize]byte, in, out []byte) {
   501  	if len(in) == 0 {
   502  		return
   503  	}
   504  
   505  	if len(in) < 32 {
   506  		e.aezTiny(delta, in, 1, out)
   507  	} else {
   508  		e.aezCore(delta, in, 1, out)
   509  	}
   510  }
   511  
   512  // Encrypt encrypts and authenticates the plaintext, authenticates the
   513  // additional data, and appends the result to ciphertext, returning the
   514  // updated slice.  The length of the authentication tag in bytes is specified
   515  // by tau.  The plaintext and dst slices MUST NOT overlap.
   516  func Encrypt(key []byte, nonce []byte, additionalData [][]byte, tau int, plaintext, dst []byte) []byte {
   517  	var delta [blockSize]byte
   518  
   519  	var x []byte
   520  	dstSz, xSz := len(dst), len(plaintext)+tau
   521  	if cap(dst) >= dstSz+xSz {
   522  		dst = dst[:dstSz+xSz]
   523  	} else {
   524  		x = make([]byte, dstSz+xSz)
   525  		copy(x, dst)
   526  		dst = x
   527  	}
   528  	x = dst[dstSz:]
   529  
   530  	var e eState
   531  	defer e.reset()
   532  
   533  	e.init(key)
   534  	e.aezHash(nonce, additionalData, tau*8, delta[:])
   535  	if len(plaintext) == 0 {
   536  		e.aezPRF(&delta, tau, x)
   537  	} else {
   538  		memwipe(x[len(plaintext):])
   539  		copy(x, plaintext)
   540  		e.encipher(&delta, x, x)
   541  	}
   542  
   543  	return dst
   544  }
   545  
   546  // Decrypt decrypts and authenticates the ciphertext, authenticates the
   547  // additional data, and if successful appends the resulting plaintext to the
   548  // provided slice and returns the updated slice and true.  The length of the
   549  // expected authentication tag in bytes is specified by tau.  The ciphertext
   550  // and dst slices MUST NOT overlap.
   551  func Decrypt(key []byte, nonce []byte, additionalData [][]byte, tau int, ciphertext, dst []byte) ([]byte, bool) {
   552  	var delta [blockSize]byte
   553  	sum := byte(0)
   554  
   555  	if len(ciphertext) < tau {
   556  		return nil, false
   557  	}
   558  
   559  	var x []byte
   560  	dstSz, xSz := len(dst), len(ciphertext)
   561  	if cap(dst) >= dstSz+xSz {
   562  		dst = dst[:dstSz+xSz]
   563  	} else {
   564  		x = make([]byte, dstSz+xSz)
   565  		copy(x, dst)
   566  		dst = x
   567  	}
   568  	x = dst[dstSz:]
   569  
   570  	var e eState
   571  	defer e.reset()
   572  
   573  	e.init(key)
   574  	e.aezHash(nonce, additionalData, tau*8, delta[:])
   575  	if len(ciphertext) == tau {
   576  		e.aezPRF(&delta, tau, x)
   577  		for i := 0; i < tau; i++ {
   578  			sum |= x[i] ^ ciphertext[i]
   579  		}
   580  		dst = dst[:dstSz]
   581  	} else {
   582  		e.decipher(&delta, ciphertext, x)
   583  		for i := 0; i < tau; i++ {
   584  			sum |= x[len(ciphertext)-tau+i]
   585  		}
   586  		if sum == 0 {
   587  			dst = dst[:dstSz+len(ciphertext)-tau]
   588  		}
   589  	}
   590  	if sum != 0 { // return true if valid, false if invalid
   591  		return nil, false
   592  	}
   593  	return dst, true
   594  }
   595  
   596  // IsHardwareAccelerated returns true iff the AEZ implementation will use
   597  // hardware acceleration (eg: AES-NI).
   598  func IsHardwareAccelerated() bool {
   599  	return isHardwareAccelerated
   600  }
   601  
   602  func memwipe(b []byte) {
   603  	for i := range b {
   604  		b[i] = 0
   605  	}
   606  }
   607  
   608  func xorBytes(a, b, dst []byte) {
   609  	if len(a) < len(dst) || len(b) < len(dst) {
   610  		panic("aez: xorBytes: len")
   611  	}
   612  	for i := 0; i < len(dst); i++ {
   613  		dst[i] = a[i] ^ b[i]
   614  	}
   615  }
   616  
   617  func swapBlocks(tmp *[blockSize]byte, b []byte) {
   618  	copy(tmp[:], b[:])
   619  	copy(b[:blockSize], b[blockSize:])
   620  	copy(b[blockSize:], tmp[:])
   621  }
   622  
   623  func init() {
   624  	// Pick the correct bitsliced round function based on target.
   625  	//
   626  	// Fucking appengine doesn't have `unsafe`, so derive based off uintptr.
   627  	// It's retarded that this isn't a constant in runtime or something.
   628  	maxUintptr := uint64(^uintptr(0))
   629  	switch maxUintptr {
   630  	case math.MaxUint32:
   631  		newAes = newRoundB32
   632  	case math.MaxUint64:
   633  		newAes = newRoundB64
   634  	default:
   635  		panic("aez/init: unsupported pointer size")
   636  	}
   637  
   638  	// Attempt to detect hardware acceleration.
   639  	platformInit()
   640  }