github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/aez/round_bitsliced32.go (about)

     1  // round_bitsliced32.go - 32 bit Constant time AES round function.
     2  //
     3  // To the extent possible under law, Yawning Angel has waived all copyright
     4  // and related or neighboring rights to aez, using the Creative
     5  // Commons "CC0" public domain dedication. See LICENSE or
     6  // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
     7  
     8  package aez
     9  
    10  import "github.com/mad-day/Yawning-crypto/bsaes/ct32"
    11  
    12  type roundB32 struct {
    13  	skey [32]uint32 // I, J, L, 0
    14  }
    15  
    16  func newRoundB32(extractedKey *[extractedKeySize]byte) aesImpl {
    17  	r := new(roundB32)
    18  	for i := 0; i < 3; i++ {
    19  		ct32.RkeyOrtho(r.skey[i*8:], extractedKey[i*16:])
    20  	}
    21  
    22  	return r
    23  }
    24  
    25  func (r *roundB32) Reset() {
    26  	memwipeU32(r.skey[:])
    27  }
    28  
    29  func (r *roundB32) AES4(j, i, l *[blockSize]byte, src []byte, dst *[blockSize]byte) {
    30  	var q [8]uint32
    31  	xorBytes4x16(j[:], i[:], l[:], src, dst[:])
    32  
    33  	ct32.Load4xU32(&q, dst[:])
    34  	r.round(&q, r.skey[8:])  // J
    35  	r.round(&q, r.skey[0:])  // I
    36  	r.round(&q, r.skey[16:]) // L
    37  	r.round(&q, r.skey[24:]) // zero
    38  	ct32.Store4xU32(dst[:], &q)
    39  
    40  	memwipeU32(q[:])
    41  }
    42  
    43  func (r *roundB32) aes4x2(
    44  	j0, i0, l0 *[blockSize]byte, src0 []byte, dst0 *[blockSize]byte,
    45  	j1, i1, l1 *[blockSize]byte, src1 []byte, dst1 *[blockSize]byte) {
    46  	// XXX/performance: Fairly sure i, src, and dst are the only things
    47  	// that are ever different here so XORs can be pruned.
    48  
    49  	var q [8]uint32
    50  	xorBytes4x16(j0[:], i0[:], l0[:], src0, dst0[:])
    51  	xorBytes4x16(j1[:], i1[:], l1[:], src1, dst1[:])
    52  
    53  	ct32.Load8xU32(&q, dst0[:], dst1[:])
    54  	r.round(&q, r.skey[8:])  // J
    55  	r.round(&q, r.skey[0:])  // I
    56  	r.round(&q, r.skey[16:]) // L
    57  	r.round(&q, r.skey[24:]) // zero
    58  	ct32.Store8xU32(dst0[:], dst1[:], &q)
    59  
    60  	memwipeU32(q[:])
    61  }
    62  
    63  func (r *roundB32) AES10(l *[blockSize]byte, src []byte, dst *[blockSize]byte) {
    64  	var q [8]uint32
    65  	xorBytes1x16(src, l[:], dst[:])
    66  
    67  	ct32.Load4xU32(&q, dst[:])
    68  	for i := 0; i < 3; i++ {
    69  		r.round(&q, r.skey[0:])  // I
    70  		r.round(&q, r.skey[8:])  // J
    71  		r.round(&q, r.skey[16:]) // L
    72  	}
    73  	r.round(&q, r.skey[0:]) // I
    74  	ct32.Store4xU32(dst[:], &q)
    75  
    76  	memwipeU32(q[:])
    77  }
    78  
    79  func (r *roundB32) round(q *[8]uint32, k []uint32) {
    80  	ct32.Sbox(q)
    81  	ct32.ShiftRows(q)
    82  	ct32.MixColumns(q)
    83  	ct32.AddRoundKey(q, k)
    84  }
    85  
    86  func (r *roundB32) aezCorePass1(e *eState, in, out []byte, X *[blockSize]byte, sz int) {
    87  	var tmp0, tmp1, I [blockSize]byte
    88  
    89  	copy(I[:], e.I[1][:])
    90  	i := 1
    91  
    92  	// Process 4 * 16 bytes at a time in a loop.
    93  	for sz >= 4*blockSize {
    94  		r.aes4x2(&e.J[0], &I, &e.L[(i+0)%8], in[blockSize:], &tmp0,
    95  			&e.J[0], &I, &e.L[(i+1)%8], in[blockSize*3:], &tmp1) // E(1,i), E(1,i+1)
    96  		xorBytes1x16(in[:], tmp0[:], out[:])
    97  		xorBytes1x16(in[blockSize*2:], tmp1[:], out[blockSize*2:])
    98  
    99  		r.aes4x2(&zero, &e.I[0], &e.L[0], out[:], &tmp0,
   100  			&zero, &e.I[0], &e.L[0], out[blockSize*2:], &tmp1) // E(0,0), E(0,0)
   101  		xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:])
   102  		xorBytes1x16(in[blockSize*3:], tmp1[:], out[blockSize*3:])
   103  
   104  		xorBytes1x16(out[blockSize:], X[:], X[:])
   105  		xorBytes1x16(out[blockSize*3:], X[:], X[:])
   106  
   107  		sz -= 4 * blockSize
   108  		in, out = in[64:], out[64:]
   109  		if (i+1)%8 == 0 {
   110  			doubleBlock(&I)
   111  		}
   112  		i += 2
   113  	}
   114  	if sz > 0 {
   115  		r.AES4(&e.J[0], &I, &e.L[i%8], in[blockSize:], &tmp0) // E(1,i)
   116  		xorBytes1x16(in[:], tmp0[:], out[:])
   117  		r.AES4(&zero, &e.I[0], &e.L[0], out[:], &tmp0) // E(0,0)
   118  		xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:])
   119  		xorBytes1x16(out[blockSize:], X[:], X[:])
   120  	}
   121  
   122  	memwipe(tmp0[:])
   123  	memwipe(tmp1[:])
   124  	memwipe(I[:])
   125  }
   126  
   127  func (r *roundB32) aezCorePass2(e *eState, out []byte, Y, S *[blockSize]byte, sz int) {
   128  	var tmp0, tmp1, I [blockSize]byte
   129  
   130  	copy(I[:], e.I[1][:])
   131  	i := 1
   132  
   133  	// Process 4 * 16 bytes at a time in a loop.
   134  	for sz >= 4*blockSize {
   135  		r.aes4x2(&e.J[1], &I, &e.L[(i+0)%8], S[:], &tmp0,
   136  			&e.J[1], &I, &e.L[(i+1)%8], S[:], &tmp1) // E(2,i)
   137  		xorBytes1x16(out, tmp0[:], out[:])
   138  		xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:])
   139  		xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
   140  		xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:])
   141  		xorBytes1x16(out, Y[:], Y[:])
   142  		xorBytes1x16(out[blockSize*2:], Y[:], Y[:])
   143  
   144  		r.aes4x2(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0,
   145  			&zero, &e.I[0], &e.L[0], out[blockSize*3:], &tmp1) // E(0,0)
   146  		xorBytes1x16(out, tmp0[:], out[:])
   147  		xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:])
   148  
   149  		r.aes4x2(&e.J[0], &I, &e.L[(i+0)%8], out[:], &tmp0,
   150  			&e.J[0], &I, &e.L[(i+1)%8], out[blockSize*2:], &tmp1) // E(1,i)
   151  		xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
   152  		xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:])
   153  
   154  		swapBlocks(&tmp0, out)
   155  		swapBlocks(&tmp0, out[blockSize*2:])
   156  
   157  		sz -= 4 * blockSize
   158  		out = out[64:]
   159  		if (i+1)%8 == 0 {
   160  			doubleBlock(&I)
   161  		}
   162  		i += 2
   163  	}
   164  	if sz > 0 {
   165  		r.AES4(&e.J[1], &I, &e.L[i%8], S[:], &tmp0) // E(2,i)
   166  		xorBytes1x16(out, tmp0[:], out[:])
   167  		xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
   168  		xorBytes1x16(out, Y[:], Y[:])
   169  
   170  		r.AES4(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0) // E(0,0)
   171  		xorBytes1x16(out, tmp0[:], out[:])
   172  
   173  		r.AES4(&e.J[0], &I, &e.L[i%8], out[:], &tmp0) // E(1,i)
   174  		xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
   175  
   176  		swapBlocks(&tmp0, out)
   177  	}
   178  
   179  	memwipe(tmp0[:])
   180  	memwipe(tmp1[:])
   181  	memwipe(I[:])
   182  }
   183  
   184  func memwipeU32(b []uint32) {
   185  	for i := range b {
   186  		b[i] = 0
   187  	}
   188  }