github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/aez/round_bitsliced32.go (about) 1 // round_bitsliced32.go - 32 bit Constant time AES round function. 2 // 3 // To the extent possible under law, Yawning Angel has waived all copyright 4 // and related or neighboring rights to aez, using the Creative 5 // Commons "CC0" public domain dedication. See LICENSE or 6 // <http://creativecommons.org/publicdomain/zero/1.0/> for full details. 7 8 package aez 9 10 import "github.com/mad-day/Yawning-crypto/bsaes/ct32" 11 12 type roundB32 struct { 13 skey [32]uint32 // I, J, L, 0 14 } 15 16 func newRoundB32(extractedKey *[extractedKeySize]byte) aesImpl { 17 r := new(roundB32) 18 for i := 0; i < 3; i++ { 19 ct32.RkeyOrtho(r.skey[i*8:], extractedKey[i*16:]) 20 } 21 22 return r 23 } 24 25 func (r *roundB32) Reset() { 26 memwipeU32(r.skey[:]) 27 } 28 29 func (r *roundB32) AES4(j, i, l *[blockSize]byte, src []byte, dst *[blockSize]byte) { 30 var q [8]uint32 31 xorBytes4x16(j[:], i[:], l[:], src, dst[:]) 32 33 ct32.Load4xU32(&q, dst[:]) 34 r.round(&q, r.skey[8:]) // J 35 r.round(&q, r.skey[0:]) // I 36 r.round(&q, r.skey[16:]) // L 37 r.round(&q, r.skey[24:]) // zero 38 ct32.Store4xU32(dst[:], &q) 39 40 memwipeU32(q[:]) 41 } 42 43 func (r *roundB32) aes4x2( 44 j0, i0, l0 *[blockSize]byte, src0 []byte, dst0 *[blockSize]byte, 45 j1, i1, l1 *[blockSize]byte, src1 []byte, dst1 *[blockSize]byte) { 46 // XXX/performance: Fairly sure i, src, and dst are the only things 47 // that are ever different here so XORs can be pruned. 48 49 var q [8]uint32 50 xorBytes4x16(j0[:], i0[:], l0[:], src0, dst0[:]) 51 xorBytes4x16(j1[:], i1[:], l1[:], src1, dst1[:]) 52 53 ct32.Load8xU32(&q, dst0[:], dst1[:]) 54 r.round(&q, r.skey[8:]) // J 55 r.round(&q, r.skey[0:]) // I 56 r.round(&q, r.skey[16:]) // L 57 r.round(&q, r.skey[24:]) // zero 58 ct32.Store8xU32(dst0[:], dst1[:], &q) 59 60 memwipeU32(q[:]) 61 } 62 63 func (r *roundB32) AES10(l *[blockSize]byte, src []byte, dst *[blockSize]byte) { 64 var q [8]uint32 65 xorBytes1x16(src, l[:], dst[:]) 66 67 ct32.Load4xU32(&q, dst[:]) 68 for i := 0; i < 3; i++ { 69 r.round(&q, r.skey[0:]) // I 70 r.round(&q, r.skey[8:]) // J 71 r.round(&q, r.skey[16:]) // L 72 } 73 r.round(&q, r.skey[0:]) // I 74 ct32.Store4xU32(dst[:], &q) 75 76 memwipeU32(q[:]) 77 } 78 79 func (r *roundB32) round(q *[8]uint32, k []uint32) { 80 ct32.Sbox(q) 81 ct32.ShiftRows(q) 82 ct32.MixColumns(q) 83 ct32.AddRoundKey(q, k) 84 } 85 86 func (r *roundB32) aezCorePass1(e *eState, in, out []byte, X *[blockSize]byte, sz int) { 87 var tmp0, tmp1, I [blockSize]byte 88 89 copy(I[:], e.I[1][:]) 90 i := 1 91 92 // Process 4 * 16 bytes at a time in a loop. 93 for sz >= 4*blockSize { 94 r.aes4x2(&e.J[0], &I, &e.L[(i+0)%8], in[blockSize:], &tmp0, 95 &e.J[0], &I, &e.L[(i+1)%8], in[blockSize*3:], &tmp1) // E(1,i), E(1,i+1) 96 xorBytes1x16(in[:], tmp0[:], out[:]) 97 xorBytes1x16(in[blockSize*2:], tmp1[:], out[blockSize*2:]) 98 99 r.aes4x2(&zero, &e.I[0], &e.L[0], out[:], &tmp0, 100 &zero, &e.I[0], &e.L[0], out[blockSize*2:], &tmp1) // E(0,0), E(0,0) 101 xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:]) 102 xorBytes1x16(in[blockSize*3:], tmp1[:], out[blockSize*3:]) 103 104 xorBytes1x16(out[blockSize:], X[:], X[:]) 105 xorBytes1x16(out[blockSize*3:], X[:], X[:]) 106 107 sz -= 4 * blockSize 108 in, out = in[64:], out[64:] 109 if (i+1)%8 == 0 { 110 doubleBlock(&I) 111 } 112 i += 2 113 } 114 if sz > 0 { 115 r.AES4(&e.J[0], &I, &e.L[i%8], in[blockSize:], &tmp0) // E(1,i) 116 xorBytes1x16(in[:], tmp0[:], out[:]) 117 r.AES4(&zero, &e.I[0], &e.L[0], out[:], &tmp0) // E(0,0) 118 xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:]) 119 xorBytes1x16(out[blockSize:], X[:], X[:]) 120 } 121 122 memwipe(tmp0[:]) 123 memwipe(tmp1[:]) 124 memwipe(I[:]) 125 } 126 127 func (r *roundB32) aezCorePass2(e *eState, out []byte, Y, S *[blockSize]byte, sz int) { 128 var tmp0, tmp1, I [blockSize]byte 129 130 copy(I[:], e.I[1][:]) 131 i := 1 132 133 // Process 4 * 16 bytes at a time in a loop. 134 for sz >= 4*blockSize { 135 r.aes4x2(&e.J[1], &I, &e.L[(i+0)%8], S[:], &tmp0, 136 &e.J[1], &I, &e.L[(i+1)%8], S[:], &tmp1) // E(2,i) 137 xorBytes1x16(out, tmp0[:], out[:]) 138 xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:]) 139 xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:]) 140 xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:]) 141 xorBytes1x16(out, Y[:], Y[:]) 142 xorBytes1x16(out[blockSize*2:], Y[:], Y[:]) 143 144 r.aes4x2(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0, 145 &zero, &e.I[0], &e.L[0], out[blockSize*3:], &tmp1) // E(0,0) 146 xorBytes1x16(out, tmp0[:], out[:]) 147 xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:]) 148 149 r.aes4x2(&e.J[0], &I, &e.L[(i+0)%8], out[:], &tmp0, 150 &e.J[0], &I, &e.L[(i+1)%8], out[blockSize*2:], &tmp1) // E(1,i) 151 xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:]) 152 xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:]) 153 154 swapBlocks(&tmp0, out) 155 swapBlocks(&tmp0, out[blockSize*2:]) 156 157 sz -= 4 * blockSize 158 out = out[64:] 159 if (i+1)%8 == 0 { 160 doubleBlock(&I) 161 } 162 i += 2 163 } 164 if sz > 0 { 165 r.AES4(&e.J[1], &I, &e.L[i%8], S[:], &tmp0) // E(2,i) 166 xorBytes1x16(out, tmp0[:], out[:]) 167 xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:]) 168 xorBytes1x16(out, Y[:], Y[:]) 169 170 r.AES4(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0) // E(0,0) 171 xorBytes1x16(out, tmp0[:], out[:]) 172 173 r.AES4(&e.J[0], &I, &e.L[i%8], out[:], &tmp0) // E(1,i) 174 xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:]) 175 176 swapBlocks(&tmp0, out) 177 } 178 179 memwipe(tmp0[:]) 180 memwipe(tmp1[:]) 181 memwipe(I[:]) 182 } 183 184 func memwipeU32(b []uint32) { 185 for i := range b { 186 b[i] = 0 187 } 188 }