github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/bsaes/ct32/aes_ct32.go (about)

     1  // Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
     2  // Copyright (c) 2017 Yawning Angel <yawning at schwanenlied dot me>
     3  //
     4  // Permission is hereby granted, free of charge, to any person obtaining
     5  // a copy of this software and associated documentation files (the
     6  // "Software"), to deal in the Software without restriction, including
     7  // without limitation the rights to use, copy, modify, merge, publish,
     8  // distribute, sublicense, and/or sell copies of the Software, and to
     9  // permit persons to whom the Software is furnished to do so, subject to
    10  // the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be
    13  // included in all copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    16  // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    17  // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    18  // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
    19  // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
    20  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    21  // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    22  // SOFTWARE.
    23  
    24  // Package ct32 is a 32 bit optimized AES implementation that processes 2
    25  // blocks at a time.
    26  package ct32
    27  
    28  import (
    29  	"crypto/cipher"
    30  	"encoding/binary"
    31  
    32  	"github.com/mad-day/Yawning-crypto/bsaes/internal/modes"
    33  )
    34  
    35  var rcon = [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}
    36  
    37  func Sbox(q *[8]uint32) {
    38  	// This S-box implementation is a straightforward translation of
    39  	// the circuit described by Boyar and Peralta in "A new
    40  	// combinational logic minimization technique with applications
    41  	// to cryptology" (https://eprint.iacr.org/2009/191.pdf).
    42  	//
    43  	// Note that variables x* (input) and s* (output) are numbered
    44  	// in "reverse" order (x0 is the high bit, x7 is the low bit).
    45  
    46  	var (
    47  		x0, x1, x2, x3, x4, x5, x6, x7                   uint32
    48  		y1, y2, y3, y4, y5, y6, y7, y8, y9               uint32
    49  		y10, y11, y12, y13, y14, y15, y16, y17, y18, y19 uint32
    50  		y20, y21                                         uint32
    51  		z0, z1, z2, z3, z4, z5, z6, z7, z8, z9           uint32
    52  		z10, z11, z12, z13, z14, z15, z16, z17           uint32
    53  		t0, t1, t2, t3, t4, t5, t6, t7, t8, t9           uint32
    54  		t10, t11, t12, t13, t14, t15, t16, t17, t18, t19 uint32
    55  		t20, t21, t22, t23, t24, t25, t26, t27, t28, t29 uint32
    56  		t30, t31, t32, t33, t34, t35, t36, t37, t38, t39 uint32
    57  		t40, t41, t42, t43, t44, t45, t46, t47, t48, t49 uint32
    58  		t50, t51, t52, t53, t54, t55, t56, t57, t58, t59 uint32
    59  		t60, t61, t62, t63, t64, t65, t66, t67           uint32
    60  		s0, s1, s2, s3, s4, s5, s6, s7                   uint32
    61  	)
    62  
    63  	x0 = q[7]
    64  	x1 = q[6]
    65  	x2 = q[5]
    66  	x3 = q[4]
    67  	x4 = q[3]
    68  	x5 = q[2]
    69  	x6 = q[1]
    70  	x7 = q[0]
    71  
    72  	//
    73  	// Top linear transformation.
    74  	//
    75  	y14 = x3 ^ x5
    76  	y13 = x0 ^ x6
    77  	y9 = x0 ^ x3
    78  	y8 = x0 ^ x5
    79  	t0 = x1 ^ x2
    80  	y1 = t0 ^ x7
    81  	y4 = y1 ^ x3
    82  	y12 = y13 ^ y14
    83  	y2 = y1 ^ x0
    84  	y5 = y1 ^ x6
    85  	y3 = y5 ^ y8
    86  	t1 = x4 ^ y12
    87  	y15 = t1 ^ x5
    88  	y20 = t1 ^ x1
    89  	y6 = y15 ^ x7
    90  	y10 = y15 ^ t0
    91  	y11 = y20 ^ y9
    92  	y7 = x7 ^ y11
    93  	y17 = y10 ^ y11
    94  	y19 = y10 ^ y8
    95  	y16 = t0 ^ y11
    96  	y21 = y13 ^ y16
    97  	y18 = x0 ^ y16
    98  
    99  	//
   100  	// Non-linear section.
   101  	//
   102  	t2 = y12 & y15
   103  	t3 = y3 & y6
   104  	t4 = t3 ^ t2
   105  	t5 = y4 & x7
   106  	t6 = t5 ^ t2
   107  	t7 = y13 & y16
   108  	t8 = y5 & y1
   109  	t9 = t8 ^ t7
   110  	t10 = y2 & y7
   111  	t11 = t10 ^ t7
   112  	t12 = y9 & y11
   113  	t13 = y14 & y17
   114  	t14 = t13 ^ t12
   115  	t15 = y8 & y10
   116  	t16 = t15 ^ t12
   117  	t17 = t4 ^ t14
   118  	t18 = t6 ^ t16
   119  	t19 = t9 ^ t14
   120  	t20 = t11 ^ t16
   121  	t21 = t17 ^ y20
   122  	t22 = t18 ^ y19
   123  	t23 = t19 ^ y21
   124  	t24 = t20 ^ y18
   125  
   126  	t25 = t21 ^ t22
   127  	t26 = t21 & t23
   128  	t27 = t24 ^ t26
   129  	t28 = t25 & t27
   130  	t29 = t28 ^ t22
   131  	t30 = t23 ^ t24
   132  	t31 = t22 ^ t26
   133  	t32 = t31 & t30
   134  	t33 = t32 ^ t24
   135  	t34 = t23 ^ t33
   136  	t35 = t27 ^ t33
   137  	t36 = t24 & t35
   138  	t37 = t36 ^ t34
   139  	t38 = t27 ^ t36
   140  	t39 = t29 & t38
   141  	t40 = t25 ^ t39
   142  
   143  	t41 = t40 ^ t37
   144  	t42 = t29 ^ t33
   145  	t43 = t29 ^ t40
   146  	t44 = t33 ^ t37
   147  	t45 = t42 ^ t41
   148  	z0 = t44 & y15
   149  	z1 = t37 & y6
   150  	z2 = t33 & x7
   151  	z3 = t43 & y16
   152  	z4 = t40 & y1
   153  	z5 = t29 & y7
   154  	z6 = t42 & y11
   155  	z7 = t45 & y17
   156  	z8 = t41 & y10
   157  	z9 = t44 & y12
   158  	z10 = t37 & y3
   159  	z11 = t33 & y4
   160  	z12 = t43 & y13
   161  	z13 = t40 & y5
   162  	z14 = t29 & y2
   163  	z15 = t42 & y9
   164  	z16 = t45 & y14
   165  	z17 = t41 & y8
   166  
   167  	//
   168  	// Bottom linear transformation.
   169  	//
   170  	t46 = z15 ^ z16
   171  	t47 = z10 ^ z11
   172  	t48 = z5 ^ z13
   173  	t49 = z9 ^ z10
   174  	t50 = z2 ^ z12
   175  	t51 = z2 ^ z5
   176  	t52 = z7 ^ z8
   177  	t53 = z0 ^ z3
   178  	t54 = z6 ^ z7
   179  	t55 = z16 ^ z17
   180  	t56 = z12 ^ t48
   181  	t57 = t50 ^ t53
   182  	t58 = z4 ^ t46
   183  	t59 = z3 ^ t54
   184  	t60 = t46 ^ t57
   185  	t61 = z14 ^ t57
   186  	t62 = t52 ^ t58
   187  	t63 = t49 ^ t58
   188  	t64 = z4 ^ t59
   189  	t65 = t61 ^ t62
   190  	t66 = z1 ^ t63
   191  	s0 = t59 ^ t63
   192  	s6 = t56 ^ (^t62)
   193  	s7 = t48 ^ (^t60)
   194  	t67 = t64 ^ t65
   195  	s3 = t53 ^ t66
   196  	s4 = t51 ^ t66
   197  	s5 = t47 ^ t65
   198  	s1 = t64 ^ (^s3)
   199  	s2 = t55 ^ (^t67)
   200  
   201  	q[7] = s0
   202  	q[6] = s1
   203  	q[5] = s2
   204  	q[4] = s3
   205  	q[3] = s4
   206  	q[2] = s5
   207  	q[1] = s6
   208  	q[0] = s7
   209  }
   210  
   211  func Ortho(q []uint32) {
   212  	_ = q[7] // Early bounds check.
   213  
   214  	const cl2, ch2 = 0x55555555, 0xAAAAAAAA
   215  	q[0], q[1] = (q[0]&cl2)|((q[1]&cl2)<<1), ((q[0]&ch2)>>1)|(q[1]&ch2)
   216  	q[2], q[3] = (q[2]&cl2)|((q[3]&cl2)<<1), ((q[2]&ch2)>>1)|(q[3]&ch2)
   217  	q[4], q[5] = (q[4]&cl2)|((q[5]&cl2)<<1), ((q[4]&ch2)>>1)|(q[5]&ch2)
   218  	q[6], q[7] = (q[6]&cl2)|((q[7]&cl2)<<1), ((q[6]&ch2)>>1)|(q[7]&ch2)
   219  
   220  	const cl4, ch4 = 0x33333333, 0xCCCCCCCC
   221  	q[0], q[2] = (q[0]&cl4)|((q[2]&cl4)<<2), ((q[0]&ch4)>>2)|(q[2]&ch4)
   222  	q[1], q[3] = (q[1]&cl4)|((q[3]&cl4)<<2), ((q[1]&ch4)>>2)|(q[3]&ch4)
   223  	q[4], q[6] = (q[4]&cl4)|((q[6]&cl4)<<2), ((q[4]&ch4)>>2)|(q[6]&ch4)
   224  	q[5], q[7] = (q[5]&cl4)|((q[7]&cl4)<<2), ((q[5]&ch4)>>2)|(q[7]&ch4)
   225  
   226  	const cl8, ch8 = 0x0F0F0F0F, 0xF0F0F0F0
   227  	q[0], q[4] = (q[0]&cl8)|((q[4]&cl8)<<4), ((q[0]&ch8)>>4)|(q[4]&ch8)
   228  	q[1], q[5] = (q[1]&cl8)|((q[5]&cl8)<<4), ((q[1]&ch8)>>4)|(q[5]&ch8)
   229  	q[2], q[6] = (q[2]&cl8)|((q[6]&cl8)<<4), ((q[2]&ch8)>>4)|(q[6]&ch8)
   230  	q[3], q[7] = (q[3]&cl8)|((q[7]&cl8)<<4), ((q[3]&ch8)>>4)|(q[7]&ch8)
   231  }
   232  
   233  func AddRoundKey(q *[8]uint32, sk []uint32) {
   234  	_ = sk[7] // Early bounds check.
   235  
   236  	q[0] ^= sk[0]
   237  	q[1] ^= sk[1]
   238  	q[2] ^= sk[2]
   239  	q[3] ^= sk[3]
   240  	q[4] ^= sk[4]
   241  	q[5] ^= sk[5]
   242  	q[6] ^= sk[6]
   243  	q[7] ^= sk[7]
   244  }
   245  
   246  func subWord(x uint32) uint32 {
   247  	var q [8]uint32
   248  
   249  	for i := range q {
   250  		q[i] = x
   251  	}
   252  	Ortho(q[:])
   253  	Sbox(&q)
   254  	Ortho(q[:])
   255  	x = q[0]
   256  	memwipeU32(q[:])
   257  	return x
   258  }
   259  
   260  func Keysched(compSkey []uint32, key []byte) int {
   261  	numRounds := 0
   262  	keyLen := len(key)
   263  	switch keyLen {
   264  	case 16:
   265  		numRounds = 10
   266  	case 24:
   267  		numRounds = 12
   268  	case 32:
   269  		numRounds = 14
   270  	default:
   271  		panic("aes/impl32: Keysched: invalid key length")
   272  	}
   273  
   274  	var skey [120]uint32
   275  	var tmp uint32
   276  	nk := keyLen >> 2
   277  	nkf := (numRounds + 1) << 2
   278  	for i := 0; i < nk; i++ {
   279  		tmp = binary.LittleEndian.Uint32(key[i<<2:])
   280  		skey[(i<<1)+0] = tmp
   281  		skey[(i<<1)+1] = tmp
   282  	}
   283  	for i, j, k := nk, 0, 0; i < nkf; i++ {
   284  		if j == 0 {
   285  			tmp = (tmp << 24) | (tmp >> 8)
   286  			tmp = subWord(tmp) ^ uint32(rcon[k])
   287  		} else if nk > 6 && j == 4 {
   288  			tmp = subWord(tmp)
   289  		}
   290  		tmp ^= skey[(i-nk)<<1]
   291  		skey[(i<<1)+0] = tmp
   292  		skey[(i<<1)+1] = tmp
   293  		if j++; j == nk {
   294  			j = 0
   295  			k++
   296  		}
   297  	}
   298  	for i := 0; i < nkf; i += 4 {
   299  		Ortho(skey[i<<1:])
   300  	}
   301  	for i, j := 0, 0; i < nkf; i, j = i+1, j+2 {
   302  		compSkey[i] = (skey[j+0] & 0x55555555) | (skey[j+1] & 0xAAAAAAAA)
   303  	}
   304  
   305  	memwipeU32(skey[:])
   306  
   307  	return numRounds
   308  }
   309  
   310  func SkeyExpand(skey []uint32, numRounds int, compSkey []uint32) {
   311  	n := (numRounds + 1) << 2
   312  	for u, v := 0, 0; u < n; u, v = u+1, v+2 {
   313  		x := compSkey[u]
   314  		y := compSkey[u]
   315  
   316  		x &= 0x55555555
   317  		skey[v+0] = x | (x << 1)
   318  		y &= 0xAAAAAAAA
   319  		skey[v+1] = y | (y >> 1)
   320  	}
   321  }
   322  
   323  func RkeyOrtho(q []uint32, key []byte) {
   324  	for i := 0; i < 4; i++ {
   325  		x := binary.LittleEndian.Uint32(key[i<<2:])
   326  		q[(i<<1)+0] = x
   327  		q[(i<<1)+1] = x
   328  	}
   329  	Ortho(q[:])
   330  	for i, j := 0, 0; i < 4; i, j = i+1, j+2 {
   331  		x := (q[j+0] & 0x55555555) | (q[j+1] & 0xAAAAAAAA)
   332  		y := x
   333  
   334  		x &= 0x55555555
   335  		q[j+0] = x | (x << 1)
   336  		y &= 0xAAAAAAAA
   337  		q[j+1] = y | (y >> 1)
   338  	}
   339  }
   340  
   341  func Load4xU32(q *[8]uint32, src []byte) {
   342  	q[0] = binary.LittleEndian.Uint32(src[:])
   343  	q[2] = binary.LittleEndian.Uint32(src[4:])
   344  	q[4] = binary.LittleEndian.Uint32(src[8:])
   345  	q[6] = binary.LittleEndian.Uint32(src[12:])
   346  	q[1] = 0
   347  	q[3] = 0
   348  	q[5] = 0
   349  	q[7] = 0
   350  	Ortho(q[:])
   351  }
   352  
   353  func Load8xU32(q *[8]uint32, src0, src1 []byte) {
   354  	src := [][]byte{src0, src1}
   355  	for i, s := range src {
   356  		q[i] = binary.LittleEndian.Uint32(s[:])
   357  		q[i+2] = binary.LittleEndian.Uint32(s[4:])
   358  		q[i+4] = binary.LittleEndian.Uint32(s[8:])
   359  		q[i+6] = binary.LittleEndian.Uint32(s[12:])
   360  	}
   361  	Ortho(q[:])
   362  }
   363  
   364  func Store4xU32(dst []byte, q *[8]uint32) {
   365  	Ortho(q[:])
   366  	binary.LittleEndian.PutUint32(dst[:], q[0])
   367  	binary.LittleEndian.PutUint32(dst[4:], q[2])
   368  	binary.LittleEndian.PutUint32(dst[8:], q[4])
   369  	binary.LittleEndian.PutUint32(dst[12:], q[6])
   370  }
   371  
   372  func Store8xU32(dst0, dst1 []byte, q *[8]uint32) {
   373  	Ortho(q[:])
   374  	dst := [][]byte{dst0, dst1}
   375  	for i, d := range dst {
   376  		binary.LittleEndian.PutUint32(d[:], q[i])
   377  		binary.LittleEndian.PutUint32(d[4:], q[i+2])
   378  		binary.LittleEndian.PutUint32(d[8:], q[i+4])
   379  		binary.LittleEndian.PutUint32(d[12:], q[i+6])
   380  	}
   381  }
   382  
   383  func rotr16(x uint32) uint32 {
   384  	return (x << 16) | (x >> 16)
   385  }
   386  
   387  func memwipeU32(s []uint32) {
   388  	for i := range s {
   389  		s[i] = 0
   390  	}
   391  }
   392  
   393  type block struct {
   394  	modes.BlockModesImpl
   395  
   396  	skExp     [120]uint32
   397  	numRounds int
   398  	wasReset  bool
   399  }
   400  
   401  func (b *block) BlockSize() int {
   402  	return 16
   403  }
   404  
   405  func (b *block) Stride() int {
   406  	return 2
   407  }
   408  
   409  func (b *block) Encrypt(dst, src []byte) {
   410  	var q [8]uint32
   411  
   412  	if b.wasReset {
   413  		panic("bsaes/ct32: Encrypt() called after Reset()")
   414  	}
   415  
   416  	Load4xU32(&q, src)
   417  	encrypt(b.numRounds, b.skExp[:], &q)
   418  	Store4xU32(dst, &q)
   419  }
   420  
   421  func (b *block) Decrypt(dst, src []byte) {
   422  	var q [8]uint32
   423  
   424  	if b.wasReset {
   425  		panic("bsaes/ct32: Decrypt() called after Reset()")
   426  	}
   427  
   428  	Load4xU32(&q, src)
   429  	decrypt(b.numRounds, b.skExp[:], &q)
   430  	Store4xU32(dst, &q)
   431  }
   432  
   433  func (b *block) BulkEncrypt(dst, src []byte) {
   434  	var q [8]uint32
   435  
   436  	if b.wasReset {
   437  		panic("bsaes/ct32: BulkEncrypt() called after Reset()")
   438  	}
   439  
   440  	Load8xU32(&q, src[0:], src[16:])
   441  	encrypt(b.numRounds, b.skExp[:], &q)
   442  	Store8xU32(dst[0:], dst[16:], &q)
   443  }
   444  
   445  func (b *block) BulkDecrypt(dst, src []byte) {
   446  	var q [8]uint32
   447  
   448  	if b.wasReset {
   449  		panic("bsaes/ct32: BulkDecrypt() called after Reset()")
   450  	}
   451  
   452  	Load8xU32(&q, src[0:], src[16:])
   453  	decrypt(b.numRounds, b.skExp[:], &q)
   454  	Store8xU32(dst[0:], dst[16:], &q)
   455  }
   456  
   457  func (b *block) Reset() {
   458  	if !b.wasReset {
   459  		b.wasReset = true
   460  		memwipeU32(b.skExp[:])
   461  	}
   462  }
   463  
   464  // NewCipher creates and returns a new cipher.Block, backed by a Impl32.
   465  func NewCipher(key []byte) cipher.Block {
   466  	var skey [60]uint32
   467  	defer memwipeU32(skey[:])
   468  
   469  	b := new(block)
   470  	b.numRounds = Keysched(skey[:], key)
   471  	SkeyExpand(b.skExp[:], b.numRounds, skey[:])
   472  
   473  	b.BlockModesImpl.Init(b)
   474  
   475  	return b
   476  }