github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/aez/round_bitsliced64.go (about) 1 // round_bitsliced64.go - 64bit constant time AES round function. 2 // 3 // To the extent possible under law, Yawning Angel has waived all copyright 4 // and related or neighboring rights to aez, using the Creative 5 // Commons "CC0" public domain dedication. See LICENSE or 6 // <http://creativecommons.org/publicdomain/zero/1.0/> for full details. 7 8 package aez 9 10 import "github.com/mad-day/Yawning-crypto/bsaes/ct64" 11 12 type roundB64 struct { 13 skey [32]uint64 // I, J, L, 0 14 } 15 16 func newRoundB64(extractedKey *[extractedKeySize]byte) aesImpl { 17 r := new(roundB64) 18 for i := 0; i < 3; i++ { 19 ct64.RkeyOrtho(r.skey[i*8:], extractedKey[i*16:]) 20 } 21 22 return r 23 } 24 25 func (r *roundB64) Reset() { 26 memwipeU64(r.skey[:]) 27 } 28 29 func (r *roundB64) AES4(j, i, l *[blockSize]byte, src []byte, dst *[blockSize]byte) { 30 var q [8]uint64 31 xorBytes4x16(j[:], i[:], l[:], src, dst[:]) 32 33 ct64.Load4xU32(&q, dst[:]) 34 r.round(&q, r.skey[8:]) // J 35 r.round(&q, r.skey[0:]) // I 36 r.round(&q, r.skey[16:]) // L 37 r.round(&q, r.skey[24:]) // zero 38 ct64.Store4xU32(dst[:], &q) 39 40 memwipeU64(q[:]) 41 } 42 43 func (r *roundB64) aes4x4( 44 j0, i0, l0 *[blockSize]byte, src0 []byte, dst0 *[blockSize]byte, 45 j1, i1, l1 *[blockSize]byte, src1 []byte, dst1 *[blockSize]byte, 46 j2, i2, l2 *[blockSize]byte, src2 []byte, dst2 *[blockSize]byte, 47 j3, i3, l3 *[blockSize]byte, src3 []byte, dst3 *[blockSize]byte) { 48 var q [8]uint64 49 xorBytes4x16(j0[:], i0[:], l0[:], src0, dst0[:]) 50 xorBytes4x16(j1[:], i1[:], l1[:], src1, dst1[:]) 51 xorBytes4x16(j2[:], i2[:], l2[:], src2, dst2[:]) 52 xorBytes4x16(j3[:], i3[:], l3[:], src3, dst3[:]) 53 54 ct64.Load16xU32(&q, dst0[:], dst1[:], dst2[:], dst3[:]) 55 r.round(&q, r.skey[8:]) // J 56 r.round(&q, r.skey[0:]) // I 57 r.round(&q, r.skey[16:]) // L 58 r.round(&q, r.skey[24:]) // zero 59 ct64.Store16xU32(dst0[:], dst1[:], dst2[:], dst3[:], &q) 60 61 memwipeU64(q[:]) 62 } 63 64 func (r *roundB64) AES10(l *[blockSize]byte, src []byte, dst *[blockSize]byte) { 65 var q [8]uint64 66 xorBytes1x16(src, l[:], dst[:]) 67 68 ct64.Load4xU32(&q, dst[:]) 69 for i := 0; i < 3; i++ { 70 r.round(&q, r.skey[0:]) // I 71 r.round(&q, r.skey[8:]) // J 72 r.round(&q, r.skey[16:]) // L 73 } 74 r.round(&q, r.skey[0:]) // I 75 ct64.Store4xU32(dst[:], &q) 76 77 memwipeU64(q[:]) 78 } 79 80 func (r *roundB64) round(q *[8]uint64, k []uint64) { 81 ct64.Sbox(q) 82 ct64.ShiftRows(q) 83 ct64.MixColumns(q) 84 ct64.AddRoundKey(q, k) 85 } 86 87 func (r *roundB64) aezCorePass1(e *eState, in, out []byte, X *[blockSize]byte, sz int) { 88 var tmp0, tmp1, tmp2, tmp3, I [blockSize]byte 89 90 copy(I[:], e.I[1][:]) 91 i := 1 92 93 // Process 8 * 16 bytes at a time in a loop. 94 for mult := false; sz >= 8*blockSize; mult = !mult { 95 r.aes4x4(&e.J[0], &I, &e.L[(i+0)%8], in[blockSize:], &tmp0, 96 &e.J[0], &I, &e.L[(i+1)%8], in[blockSize*3:], &tmp1, 97 &e.J[0], &I, &e.L[(i+2)%8], in[blockSize*5:], &tmp2, 98 &e.J[0], &I, &e.L[(i+3)%8], in[blockSize*7:], &tmp3) // E(1,i) ... E(1,i+3) 99 xorBytes1x16(in[:], tmp0[:], out[:]) 100 xorBytes1x16(in[blockSize*2:], tmp1[:], out[blockSize*2:]) 101 xorBytes1x16(in[blockSize*4:], tmp2[:], out[blockSize*4:]) 102 xorBytes1x16(in[blockSize*6:], tmp3[:], out[blockSize*6:]) 103 104 r.aes4x4(&zero, &e.I[0], &e.L[0], out[:], &tmp0, 105 &zero, &e.I[0], &e.L[0], out[blockSize*2:], &tmp1, 106 &zero, &e.I[0], &e.L[0], out[blockSize*4:], &tmp2, 107 &zero, &e.I[0], &e.L[0], out[blockSize*6:], &tmp3) // E(0,0) x4 108 xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:]) 109 xorBytes1x16(in[blockSize*3:], tmp1[:], out[blockSize*3:]) 110 xorBytes1x16(in[blockSize*5:], tmp2[:], out[blockSize*5:]) 111 xorBytes1x16(in[blockSize*7:], tmp3[:], out[blockSize*7:]) 112 113 xorBytes1x16(out[blockSize:], X[:], X[:]) 114 xorBytes1x16(out[blockSize*3:], X[:], X[:]) 115 xorBytes1x16(out[blockSize*5:], X[:], X[:]) 116 xorBytes1x16(out[blockSize*7:], X[:], X[:]) 117 118 sz -= 8 * blockSize 119 in, out = in[128:], out[128:] 120 if mult { // Multiply every other pass. 121 doubleBlock(&I) 122 } 123 i += 4 124 } 125 126 // XXX/performance: 4 * 16 bytes at a time. 127 128 for sz > 0 { 129 r.AES4(&e.J[0], &I, &e.L[i%8], in[blockSize:], &tmp0) // E(1,i) 130 xorBytes1x16(in[:], tmp0[:], out[:]) 131 r.AES4(&zero, &e.I[0], &e.L[0], out[:], &tmp0) // E(0,0) 132 xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:]) 133 xorBytes1x16(out[blockSize:], X[:], X[:]) 134 135 sz -= 2 * blockSize 136 in, out = in[32:], out[32:] 137 if i%8 == 0 { 138 doubleBlock(&I) 139 } 140 i++ 141 } 142 143 memwipe(tmp0[:]) 144 memwipe(tmp1[:]) 145 memwipe(tmp2[:]) 146 memwipe(tmp3[:]) 147 memwipe(I[:]) 148 } 149 150 func (r *roundB64) aezCorePass2(e *eState, out []byte, Y, S *[blockSize]byte, sz int) { 151 var tmp0, tmp1, tmp2, tmp3, I [blockSize]byte 152 153 copy(I[:], e.I[1][:]) 154 i := 1 155 156 // Process 8 * 16 bytes at a time in a loop. 157 for mult := false; sz >= 8*blockSize; mult = !mult { 158 r.aes4x4(&e.J[1], &I, &e.L[(i+0)%8], S[:], &tmp0, 159 &e.J[1], &I, &e.L[(i+1)%8], S[:], &tmp1, 160 &e.J[1], &I, &e.L[(i+2)%8], S[:], &tmp2, 161 &e.J[1], &I, &e.L[(i+3)%8], S[:], &tmp3) // E(2,i) .. E(2,i+3) 162 xorBytes1x16(out, tmp0[:], out[:]) 163 xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:]) 164 xorBytes1x16(out[blockSize*4:], tmp2[:], out[blockSize*4:]) 165 xorBytes1x16(out[blockSize*6:], tmp3[:], out[blockSize*6:]) 166 xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:]) 167 xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:]) 168 xorBytes1x16(out[blockSize*5:], tmp2[:], out[blockSize*5:]) 169 xorBytes1x16(out[blockSize*7:], tmp3[:], out[blockSize*7:]) 170 xorBytes1x16(out, Y[:], Y[:]) 171 xorBytes1x16(out[blockSize*2:], Y[:], Y[:]) 172 xorBytes1x16(out[blockSize*4:], Y[:], Y[:]) 173 xorBytes1x16(out[blockSize*6:], Y[:], Y[:]) 174 175 r.aes4x4(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0, 176 &zero, &e.I[0], &e.L[0], out[blockSize*3:], &tmp1, 177 &zero, &e.I[0], &e.L[0], out[blockSize*5:], &tmp2, 178 &zero, &e.I[0], &e.L[0], out[blockSize*7:], &tmp3) // E(0,0)x4 179 xorBytes1x16(out, tmp0[:], out[:]) 180 xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:]) 181 xorBytes1x16(out[blockSize*4:], tmp2[:], out[blockSize*4:]) 182 xorBytes1x16(out[blockSize*6:], tmp3[:], out[blockSize*6:]) 183 184 r.aes4x4(&e.J[0], &I, &e.L[(i+0)%8], out[:], &tmp0, 185 &e.J[0], &I, &e.L[(i+1)%8], out[blockSize*2:], &tmp1, 186 &e.J[0], &I, &e.L[(i+2)%8], out[blockSize*4:], &tmp2, 187 &e.J[0], &I, &e.L[(i+3)%8], out[blockSize*6:], &tmp3) // E(1,i) ... E(1,i+3) 188 xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:]) 189 xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:]) 190 xorBytes1x16(out[blockSize*5:], tmp2[:], out[blockSize*5:]) 191 xorBytes1x16(out[blockSize*7:], tmp3[:], out[blockSize*7:]) 192 193 swapBlocks(&tmp0, out) 194 swapBlocks(&tmp0, out[blockSize*2:]) 195 swapBlocks(&tmp0, out[blockSize*4:]) 196 swapBlocks(&tmp0, out[blockSize*6:]) 197 198 sz -= 8 * blockSize 199 out = out[128:] 200 if mult { // Multiply every other pass. 201 doubleBlock(&I) 202 } 203 i += 4 204 } 205 206 // XXX/performance: 4 * 16 bytes at a time. 207 208 for sz > 0 { 209 r.AES4(&e.J[1], &I, &e.L[i%8], S[:], &tmp0) // E(2,i) 210 xorBytes1x16(out, tmp0[:], out[:]) 211 xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:]) 212 xorBytes1x16(out, Y[:], Y[:]) 213 214 r.AES4(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0) // E(0,0) 215 xorBytes1x16(out, tmp0[:], out[:]) 216 217 r.AES4(&e.J[0], &I, &e.L[i%8], out[:], &tmp0) // E(1,i) 218 xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:]) 219 220 swapBlocks(&tmp0, out) 221 222 sz -= 2 * blockSize 223 out = out[32:] 224 if i%8 == 0 { 225 doubleBlock(&I) 226 } 227 i++ 228 } 229 230 memwipe(tmp0[:]) 231 memwipe(tmp1[:]) 232 memwipe(tmp2[:]) 233 memwipe(tmp3[:]) 234 memwipe(I[:]) 235 } 236 237 func memwipeU64(s []uint64) { 238 for i := range s { 239 s[i] = 0 240 } 241 }