github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/bsaes/ct32/aes_ct32.go (about) 1 // Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> 2 // Copyright (c) 2017 Yawning Angel <yawning at schwanenlied dot me> 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining 5 // a copy of this software and associated documentation files (the 6 // "Software"), to deal in the Software without restriction, including 7 // without limitation the rights to use, copy, modify, merge, publish, 8 // distribute, sublicense, and/or sell copies of the Software, and to 9 // permit persons to whom the Software is furnished to do so, subject to 10 // the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be 13 // included in all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 // SOFTWARE. 23 24 // Package ct32 is a 32 bit optimized AES implementation that processes 2 25 // blocks at a time. 26 package ct32 27 28 import ( 29 "crypto/cipher" 30 "encoding/binary" 31 32 "github.com/mad-day/Yawning-crypto/bsaes/internal/modes" 33 ) 34 35 var rcon = [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36} 36 37 func Sbox(q *[8]uint32) { 38 // This S-box implementation is a straightforward translation of 39 // the circuit described by Boyar and Peralta in "A new 40 // combinational logic minimization technique with applications 41 // to cryptology" (https://eprint.iacr.org/2009/191.pdf). 42 // 43 // Note that variables x* (input) and s* (output) are numbered 44 // in "reverse" order (x0 is the high bit, x7 is the low bit). 45 46 var ( 47 x0, x1, x2, x3, x4, x5, x6, x7 uint32 48 y1, y2, y3, y4, y5, y6, y7, y8, y9 uint32 49 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19 uint32 50 y20, y21 uint32 51 z0, z1, z2, z3, z4, z5, z6, z7, z8, z9 uint32 52 z10, z11, z12, z13, z14, z15, z16, z17 uint32 53 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 uint32 54 t10, t11, t12, t13, t14, t15, t16, t17, t18, t19 uint32 55 t20, t21, t22, t23, t24, t25, t26, t27, t28, t29 uint32 56 t30, t31, t32, t33, t34, t35, t36, t37, t38, t39 uint32 57 t40, t41, t42, t43, t44, t45, t46, t47, t48, t49 uint32 58 t50, t51, t52, t53, t54, t55, t56, t57, t58, t59 uint32 59 t60, t61, t62, t63, t64, t65, t66, t67 uint32 60 s0, s1, s2, s3, s4, s5, s6, s7 uint32 61 ) 62 63 x0 = q[7] 64 x1 = q[6] 65 x2 = q[5] 66 x3 = q[4] 67 x4 = q[3] 68 x5 = q[2] 69 x6 = q[1] 70 x7 = q[0] 71 72 // 73 // Top linear transformation. 74 // 75 y14 = x3 ^ x5 76 y13 = x0 ^ x6 77 y9 = x0 ^ x3 78 y8 = x0 ^ x5 79 t0 = x1 ^ x2 80 y1 = t0 ^ x7 81 y4 = y1 ^ x3 82 y12 = y13 ^ y14 83 y2 = y1 ^ x0 84 y5 = y1 ^ x6 85 y3 = y5 ^ y8 86 t1 = x4 ^ y12 87 y15 = t1 ^ x5 88 y20 = t1 ^ x1 89 y6 = y15 ^ x7 90 y10 = y15 ^ t0 91 y11 = y20 ^ y9 92 y7 = x7 ^ y11 93 y17 = y10 ^ y11 94 y19 = y10 ^ y8 95 y16 = t0 ^ y11 96 y21 = y13 ^ y16 97 y18 = x0 ^ y16 98 99 // 100 // Non-linear section. 101 // 102 t2 = y12 & y15 103 t3 = y3 & y6 104 t4 = t3 ^ t2 105 t5 = y4 & x7 106 t6 = t5 ^ t2 107 t7 = y13 & y16 108 t8 = y5 & y1 109 t9 = t8 ^ t7 110 t10 = y2 & y7 111 t11 = t10 ^ t7 112 t12 = y9 & y11 113 t13 = y14 & y17 114 t14 = t13 ^ t12 115 t15 = y8 & y10 116 t16 = t15 ^ t12 117 t17 = t4 ^ t14 118 t18 = t6 ^ t16 119 t19 = t9 ^ t14 120 t20 = t11 ^ t16 121 t21 = t17 ^ y20 122 t22 = t18 ^ y19 123 t23 = t19 ^ y21 124 t24 = t20 ^ y18 125 126 t25 = t21 ^ t22 127 t26 = t21 & t23 128 t27 = t24 ^ t26 129 t28 = t25 & t27 130 t29 = t28 ^ t22 131 t30 = t23 ^ t24 132 t31 = t22 ^ t26 133 t32 = t31 & t30 134 t33 = t32 ^ t24 135 t34 = t23 ^ t33 136 t35 = t27 ^ t33 137 t36 = t24 & t35 138 t37 = t36 ^ t34 139 t38 = t27 ^ t36 140 t39 = t29 & t38 141 t40 = t25 ^ t39 142 143 t41 = t40 ^ t37 144 t42 = t29 ^ t33 145 t43 = t29 ^ t40 146 t44 = t33 ^ t37 147 t45 = t42 ^ t41 148 z0 = t44 & y15 149 z1 = t37 & y6 150 z2 = t33 & x7 151 z3 = t43 & y16 152 z4 = t40 & y1 153 z5 = t29 & y7 154 z6 = t42 & y11 155 z7 = t45 & y17 156 z8 = t41 & y10 157 z9 = t44 & y12 158 z10 = t37 & y3 159 z11 = t33 & y4 160 z12 = t43 & y13 161 z13 = t40 & y5 162 z14 = t29 & y2 163 z15 = t42 & y9 164 z16 = t45 & y14 165 z17 = t41 & y8 166 167 // 168 // Bottom linear transformation. 169 // 170 t46 = z15 ^ z16 171 t47 = z10 ^ z11 172 t48 = z5 ^ z13 173 t49 = z9 ^ z10 174 t50 = z2 ^ z12 175 t51 = z2 ^ z5 176 t52 = z7 ^ z8 177 t53 = z0 ^ z3 178 t54 = z6 ^ z7 179 t55 = z16 ^ z17 180 t56 = z12 ^ t48 181 t57 = t50 ^ t53 182 t58 = z4 ^ t46 183 t59 = z3 ^ t54 184 t60 = t46 ^ t57 185 t61 = z14 ^ t57 186 t62 = t52 ^ t58 187 t63 = t49 ^ t58 188 t64 = z4 ^ t59 189 t65 = t61 ^ t62 190 t66 = z1 ^ t63 191 s0 = t59 ^ t63 192 s6 = t56 ^ (^t62) 193 s7 = t48 ^ (^t60) 194 t67 = t64 ^ t65 195 s3 = t53 ^ t66 196 s4 = t51 ^ t66 197 s5 = t47 ^ t65 198 s1 = t64 ^ (^s3) 199 s2 = t55 ^ (^t67) 200 201 q[7] = s0 202 q[6] = s1 203 q[5] = s2 204 q[4] = s3 205 q[3] = s4 206 q[2] = s5 207 q[1] = s6 208 q[0] = s7 209 } 210 211 func Ortho(q []uint32) { 212 _ = q[7] // Early bounds check. 213 214 const cl2, ch2 = 0x55555555, 0xAAAAAAAA 215 q[0], q[1] = (q[0]&cl2)|((q[1]&cl2)<<1), ((q[0]&ch2)>>1)|(q[1]&ch2) 216 q[2], q[3] = (q[2]&cl2)|((q[3]&cl2)<<1), ((q[2]&ch2)>>1)|(q[3]&ch2) 217 q[4], q[5] = (q[4]&cl2)|((q[5]&cl2)<<1), ((q[4]&ch2)>>1)|(q[5]&ch2) 218 q[6], q[7] = (q[6]&cl2)|((q[7]&cl2)<<1), ((q[6]&ch2)>>1)|(q[7]&ch2) 219 220 const cl4, ch4 = 0x33333333, 0xCCCCCCCC 221 q[0], q[2] = (q[0]&cl4)|((q[2]&cl4)<<2), ((q[0]&ch4)>>2)|(q[2]&ch4) 222 q[1], q[3] = (q[1]&cl4)|((q[3]&cl4)<<2), ((q[1]&ch4)>>2)|(q[3]&ch4) 223 q[4], q[6] = (q[4]&cl4)|((q[6]&cl4)<<2), ((q[4]&ch4)>>2)|(q[6]&ch4) 224 q[5], q[7] = (q[5]&cl4)|((q[7]&cl4)<<2), ((q[5]&ch4)>>2)|(q[7]&ch4) 225 226 const cl8, ch8 = 0x0F0F0F0F, 0xF0F0F0F0 227 q[0], q[4] = (q[0]&cl8)|((q[4]&cl8)<<4), ((q[0]&ch8)>>4)|(q[4]&ch8) 228 q[1], q[5] = (q[1]&cl8)|((q[5]&cl8)<<4), ((q[1]&ch8)>>4)|(q[5]&ch8) 229 q[2], q[6] = (q[2]&cl8)|((q[6]&cl8)<<4), ((q[2]&ch8)>>4)|(q[6]&ch8) 230 q[3], q[7] = (q[3]&cl8)|((q[7]&cl8)<<4), ((q[3]&ch8)>>4)|(q[7]&ch8) 231 } 232 233 func AddRoundKey(q *[8]uint32, sk []uint32) { 234 _ = sk[7] // Early bounds check. 235 236 q[0] ^= sk[0] 237 q[1] ^= sk[1] 238 q[2] ^= sk[2] 239 q[3] ^= sk[3] 240 q[4] ^= sk[4] 241 q[5] ^= sk[5] 242 q[6] ^= sk[6] 243 q[7] ^= sk[7] 244 } 245 246 func subWord(x uint32) uint32 { 247 var q [8]uint32 248 249 for i := range q { 250 q[i] = x 251 } 252 Ortho(q[:]) 253 Sbox(&q) 254 Ortho(q[:]) 255 x = q[0] 256 memwipeU32(q[:]) 257 return x 258 } 259 260 func Keysched(compSkey []uint32, key []byte) int { 261 numRounds := 0 262 keyLen := len(key) 263 switch keyLen { 264 case 16: 265 numRounds = 10 266 case 24: 267 numRounds = 12 268 case 32: 269 numRounds = 14 270 default: 271 panic("aes/impl32: Keysched: invalid key length") 272 } 273 274 var skey [120]uint32 275 var tmp uint32 276 nk := keyLen >> 2 277 nkf := (numRounds + 1) << 2 278 for i := 0; i < nk; i++ { 279 tmp = binary.LittleEndian.Uint32(key[i<<2:]) 280 skey[(i<<1)+0] = tmp 281 skey[(i<<1)+1] = tmp 282 } 283 for i, j, k := nk, 0, 0; i < nkf; i++ { 284 if j == 0 { 285 tmp = (tmp << 24) | (tmp >> 8) 286 tmp = subWord(tmp) ^ uint32(rcon[k]) 287 } else if nk > 6 && j == 4 { 288 tmp = subWord(tmp) 289 } 290 tmp ^= skey[(i-nk)<<1] 291 skey[(i<<1)+0] = tmp 292 skey[(i<<1)+1] = tmp 293 if j++; j == nk { 294 j = 0 295 k++ 296 } 297 } 298 for i := 0; i < nkf; i += 4 { 299 Ortho(skey[i<<1:]) 300 } 301 for i, j := 0, 0; i < nkf; i, j = i+1, j+2 { 302 compSkey[i] = (skey[j+0] & 0x55555555) | (skey[j+1] & 0xAAAAAAAA) 303 } 304 305 memwipeU32(skey[:]) 306 307 return numRounds 308 } 309 310 func SkeyExpand(skey []uint32, numRounds int, compSkey []uint32) { 311 n := (numRounds + 1) << 2 312 for u, v := 0, 0; u < n; u, v = u+1, v+2 { 313 x := compSkey[u] 314 y := compSkey[u] 315 316 x &= 0x55555555 317 skey[v+0] = x | (x << 1) 318 y &= 0xAAAAAAAA 319 skey[v+1] = y | (y >> 1) 320 } 321 } 322 323 func RkeyOrtho(q []uint32, key []byte) { 324 for i := 0; i < 4; i++ { 325 x := binary.LittleEndian.Uint32(key[i<<2:]) 326 q[(i<<1)+0] = x 327 q[(i<<1)+1] = x 328 } 329 Ortho(q[:]) 330 for i, j := 0, 0; i < 4; i, j = i+1, j+2 { 331 x := (q[j+0] & 0x55555555) | (q[j+1] & 0xAAAAAAAA) 332 y := x 333 334 x &= 0x55555555 335 q[j+0] = x | (x << 1) 336 y &= 0xAAAAAAAA 337 q[j+1] = y | (y >> 1) 338 } 339 } 340 341 func Load4xU32(q *[8]uint32, src []byte) { 342 q[0] = binary.LittleEndian.Uint32(src[:]) 343 q[2] = binary.LittleEndian.Uint32(src[4:]) 344 q[4] = binary.LittleEndian.Uint32(src[8:]) 345 q[6] = binary.LittleEndian.Uint32(src[12:]) 346 q[1] = 0 347 q[3] = 0 348 q[5] = 0 349 q[7] = 0 350 Ortho(q[:]) 351 } 352 353 func Load8xU32(q *[8]uint32, src0, src1 []byte) { 354 src := [][]byte{src0, src1} 355 for i, s := range src { 356 q[i] = binary.LittleEndian.Uint32(s[:]) 357 q[i+2] = binary.LittleEndian.Uint32(s[4:]) 358 q[i+4] = binary.LittleEndian.Uint32(s[8:]) 359 q[i+6] = binary.LittleEndian.Uint32(s[12:]) 360 } 361 Ortho(q[:]) 362 } 363 364 func Store4xU32(dst []byte, q *[8]uint32) { 365 Ortho(q[:]) 366 binary.LittleEndian.PutUint32(dst[:], q[0]) 367 binary.LittleEndian.PutUint32(dst[4:], q[2]) 368 binary.LittleEndian.PutUint32(dst[8:], q[4]) 369 binary.LittleEndian.PutUint32(dst[12:], q[6]) 370 } 371 372 func Store8xU32(dst0, dst1 []byte, q *[8]uint32) { 373 Ortho(q[:]) 374 dst := [][]byte{dst0, dst1} 375 for i, d := range dst { 376 binary.LittleEndian.PutUint32(d[:], q[i]) 377 binary.LittleEndian.PutUint32(d[4:], q[i+2]) 378 binary.LittleEndian.PutUint32(d[8:], q[i+4]) 379 binary.LittleEndian.PutUint32(d[12:], q[i+6]) 380 } 381 } 382 383 func rotr16(x uint32) uint32 { 384 return (x << 16) | (x >> 16) 385 } 386 387 func memwipeU32(s []uint32) { 388 for i := range s { 389 s[i] = 0 390 } 391 } 392 393 type block struct { 394 modes.BlockModesImpl 395 396 skExp [120]uint32 397 numRounds int 398 wasReset bool 399 } 400 401 func (b *block) BlockSize() int { 402 return 16 403 } 404 405 func (b *block) Stride() int { 406 return 2 407 } 408 409 func (b *block) Encrypt(dst, src []byte) { 410 var q [8]uint32 411 412 if b.wasReset { 413 panic("bsaes/ct32: Encrypt() called after Reset()") 414 } 415 416 Load4xU32(&q, src) 417 encrypt(b.numRounds, b.skExp[:], &q) 418 Store4xU32(dst, &q) 419 } 420 421 func (b *block) Decrypt(dst, src []byte) { 422 var q [8]uint32 423 424 if b.wasReset { 425 panic("bsaes/ct32: Decrypt() called after Reset()") 426 } 427 428 Load4xU32(&q, src) 429 decrypt(b.numRounds, b.skExp[:], &q) 430 Store4xU32(dst, &q) 431 } 432 433 func (b *block) BulkEncrypt(dst, src []byte) { 434 var q [8]uint32 435 436 if b.wasReset { 437 panic("bsaes/ct32: BulkEncrypt() called after Reset()") 438 } 439 440 Load8xU32(&q, src[0:], src[16:]) 441 encrypt(b.numRounds, b.skExp[:], &q) 442 Store8xU32(dst[0:], dst[16:], &q) 443 } 444 445 func (b *block) BulkDecrypt(dst, src []byte) { 446 var q [8]uint32 447 448 if b.wasReset { 449 panic("bsaes/ct32: BulkDecrypt() called after Reset()") 450 } 451 452 Load8xU32(&q, src[0:], src[16:]) 453 decrypt(b.numRounds, b.skExp[:], &q) 454 Store8xU32(dst[0:], dst[16:], &q) 455 } 456 457 func (b *block) Reset() { 458 if !b.wasReset { 459 b.wasReset = true 460 memwipeU32(b.skExp[:]) 461 } 462 } 463 464 // NewCipher creates and returns a new cipher.Block, backed by a Impl32. 465 func NewCipher(key []byte) cipher.Block { 466 var skey [60]uint32 467 defer memwipeU32(skey[:]) 468 469 b := new(block) 470 b.numRounds = Keysched(skey[:], key) 471 SkeyExpand(b.skExp[:], b.numRounds, skey[:]) 472 473 b.BlockModesImpl.Init(b) 474 475 return b 476 }