github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/bsaes/ct64/aes_ct64.go (about) 1 // Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> 2 // Copyright (c) 2017 Yawning Angel <yawning at schwanenlied dot me> 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining 5 // a copy of this software and associated documentation files (the 6 // "Software"), to deal in the Software without restriction, including 7 // without limitation the rights to use, copy, modify, merge, publish, 8 // distribute, sublicense, and/or sell copies of the Software, and to 9 // permit persons to whom the Software is furnished to do so, subject to 10 // the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be 13 // included in all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 // SOFTWARE. 23 24 // Package ct64 is a 64 bit optimized AES implementation that processes 4 25 // blocks at a time. 26 package ct64 27 28 import ( 29 "crypto/cipher" 30 "encoding/binary" 31 32 "github.com/mad-day/Yawning-crypto/bsaes/internal/modes" 33 ) 34 35 var rcon = [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36} 36 37 func Sbox(q *[8]uint64) { 38 // This S-box implementation is a straightforward translation of 39 // the circuit described by Boyar and Peralta in "A new 40 // combinational logic minimization technique with applications 41 // to cryptology" (https://eprint.iacr.org/2009/191.pdf). 42 // 43 // Note that variables x* (input) and s* (output) are numbered 44 // in "reverse" order (x0 is the high bit, x7 is the low bit). 45 46 var x0, x1, x2, x3, x4, x5, x6, x7 uint64 47 var y1, y2, y3, y4, y5, y6, y7, y8, y9 uint64 48 var y10, y11, y12, y13, y14, y15, y16, y17, y18, y19 uint64 49 var y20, y21 uint64 50 var z0, z1, z2, z3, z4, z5, z6, z7, z8, z9 uint64 51 var z10, z11, z12, z13, z14, z15, z16, z17 uint64 52 var t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 uint64 53 var t10, t11, t12, t13, t14, t15, t16, t17, t18, t19 uint64 54 var t20, t21, t22, t23, t24, t25, t26, t27, t28, t29 uint64 55 var t30, t31, t32, t33, t34, t35, t36, t37, t38, t39 uint64 56 var t40, t41, t42, t43, t44, t45, t46, t47, t48, t49 uint64 57 var t50, t51, t52, t53, t54, t55, t56, t57, t58, t59 uint64 58 var t60, t61, t62, t63, t64, t65, t66, t67 uint64 59 var s0, s1, s2, s3, s4, s5, s6, s7 uint64 60 61 x0 = q[7] 62 x1 = q[6] 63 x2 = q[5] 64 x3 = q[4] 65 x4 = q[3] 66 x5 = q[2] 67 x6 = q[1] 68 x7 = q[0] 69 70 // 71 // Top linear transformation. 72 // 73 y14 = x3 ^ x5 74 y13 = x0 ^ x6 75 y9 = x0 ^ x3 76 y8 = x0 ^ x5 77 t0 = x1 ^ x2 78 y1 = t0 ^ x7 79 y4 = y1 ^ x3 80 y12 = y13 ^ y14 81 y2 = y1 ^ x0 82 y5 = y1 ^ x6 83 y3 = y5 ^ y8 84 t1 = x4 ^ y12 85 y15 = t1 ^ x5 86 y20 = t1 ^ x1 87 y6 = y15 ^ x7 88 y10 = y15 ^ t0 89 y11 = y20 ^ y9 90 y7 = x7 ^ y11 91 y17 = y10 ^ y11 92 y19 = y10 ^ y8 93 y16 = t0 ^ y11 94 y21 = y13 ^ y16 95 y18 = x0 ^ y16 96 97 // 98 // Non-linear section. 99 // 100 t2 = y12 & y15 101 t3 = y3 & y6 102 t4 = t3 ^ t2 103 t5 = y4 & x7 104 t6 = t5 ^ t2 105 t7 = y13 & y16 106 t8 = y5 & y1 107 t9 = t8 ^ t7 108 t10 = y2 & y7 109 t11 = t10 ^ t7 110 t12 = y9 & y11 111 t13 = y14 & y17 112 t14 = t13 ^ t12 113 t15 = y8 & y10 114 t16 = t15 ^ t12 115 t17 = t4 ^ t14 116 t18 = t6 ^ t16 117 t19 = t9 ^ t14 118 t20 = t11 ^ t16 119 t21 = t17 ^ y20 120 t22 = t18 ^ y19 121 t23 = t19 ^ y21 122 t24 = t20 ^ y18 123 124 t25 = t21 ^ t22 125 t26 = t21 & t23 126 t27 = t24 ^ t26 127 t28 = t25 & t27 128 t29 = t28 ^ t22 129 t30 = t23 ^ t24 130 t31 = t22 ^ t26 131 t32 = t31 & t30 132 t33 = t32 ^ t24 133 t34 = t23 ^ t33 134 t35 = t27 ^ t33 135 t36 = t24 & t35 136 t37 = t36 ^ t34 137 t38 = t27 ^ t36 138 t39 = t29 & t38 139 t40 = t25 ^ t39 140 141 t41 = t40 ^ t37 142 t42 = t29 ^ t33 143 t43 = t29 ^ t40 144 t44 = t33 ^ t37 145 t45 = t42 ^ t41 146 z0 = t44 & y15 147 z1 = t37 & y6 148 z2 = t33 & x7 149 z3 = t43 & y16 150 z4 = t40 & y1 151 z5 = t29 & y7 152 z6 = t42 & y11 153 z7 = t45 & y17 154 z8 = t41 & y10 155 z9 = t44 & y12 156 z10 = t37 & y3 157 z11 = t33 & y4 158 z12 = t43 & y13 159 z13 = t40 & y5 160 z14 = t29 & y2 161 z15 = t42 & y9 162 z16 = t45 & y14 163 z17 = t41 & y8 164 165 // 166 // Bottom linear transformation. 167 // 168 t46 = z15 ^ z16 169 t47 = z10 ^ z11 170 t48 = z5 ^ z13 171 t49 = z9 ^ z10 172 t50 = z2 ^ z12 173 t51 = z2 ^ z5 174 t52 = z7 ^ z8 175 t53 = z0 ^ z3 176 t54 = z6 ^ z7 177 t55 = z16 ^ z17 178 t56 = z12 ^ t48 179 t57 = t50 ^ t53 180 t58 = z4 ^ t46 181 t59 = z3 ^ t54 182 t60 = t46 ^ t57 183 t61 = z14 ^ t57 184 t62 = t52 ^ t58 185 t63 = t49 ^ t58 186 t64 = z4 ^ t59 187 t65 = t61 ^ t62 188 t66 = z1 ^ t63 189 s0 = t59 ^ t63 190 s6 = t56 ^ (^t62) 191 s7 = t48 ^ (^t60) 192 t67 = t64 ^ t65 193 s3 = t53 ^ t66 194 s4 = t51 ^ t66 195 s5 = t47 ^ t65 196 s1 = t64 ^ (^s3) 197 s2 = t55 ^ (^t67) 198 199 q[7] = s0 200 q[6] = s1 201 q[5] = s2 202 q[4] = s3 203 q[3] = s4 204 q[2] = s5 205 q[1] = s6 206 q[0] = s7 207 } 208 209 func Ortho(q []uint64) { 210 _ = q[7] // Early bounds check. 211 212 const cl2, ch2 = 0x5555555555555555, 0xAAAAAAAAAAAAAAAA 213 q[0], q[1] = (q[0]&cl2)|((q[1]&cl2)<<1), ((q[0]&ch2)>>1)|(q[1]&ch2) 214 q[2], q[3] = (q[2]&cl2)|((q[3]&cl2)<<1), ((q[2]&ch2)>>1)|(q[3]&ch2) 215 q[4], q[5] = (q[4]&cl2)|((q[5]&cl2)<<1), ((q[4]&ch2)>>1)|(q[5]&ch2) 216 q[6], q[7] = (q[6]&cl2)|((q[7]&cl2)<<1), ((q[6]&ch2)>>1)|(q[7]&ch2) 217 218 const cl4, ch4 = 0x3333333333333333, 0xCCCCCCCCCCCCCCCC 219 q[0], q[2] = (q[0]&cl4)|((q[2]&cl4)<<2), ((q[0]&ch4)>>2)|(q[2]&ch4) 220 q[1], q[3] = (q[1]&cl4)|((q[3]&cl4)<<2), ((q[1]&ch4)>>2)|(q[3]&ch4) 221 q[4], q[6] = (q[4]&cl4)|((q[6]&cl4)<<2), ((q[4]&ch4)>>2)|(q[6]&ch4) 222 q[5], q[7] = (q[5]&cl4)|((q[7]&cl4)<<2), ((q[5]&ch4)>>2)|(q[7]&ch4) 223 224 const cl8, ch8 = 0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0 225 q[0], q[4] = (q[0]&cl8)|((q[4]&cl8)<<4), ((q[0]&ch8)>>4)|(q[4]&ch8) 226 q[1], q[5] = (q[1]&cl8)|((q[5]&cl8)<<4), ((q[1]&ch8)>>4)|(q[5]&ch8) 227 q[2], q[6] = (q[2]&cl8)|((q[6]&cl8)<<4), ((q[2]&ch8)>>4)|(q[6]&ch8) 228 q[3], q[7] = (q[3]&cl8)|((q[7]&cl8)<<4), ((q[3]&ch8)>>4)|(q[7]&ch8) 229 } 230 231 func InterleaveIn(q0, q1 *uint64, w []uint32) { 232 _ = w[3] 233 x0, x1, x2, x3 := uint64(w[0]), uint64(w[1]), uint64(w[2]), uint64(w[3]) 234 x0 |= (x0 << 16) 235 x1 |= (x1 << 16) 236 x2 |= (x2 << 16) 237 x3 |= (x3 << 16) 238 x0 &= 0x0000FFFF0000FFFF 239 x1 &= 0x0000FFFF0000FFFF 240 x2 &= 0x0000FFFF0000FFFF 241 x3 &= 0x0000FFFF0000FFFF 242 x0 |= (x0 << 8) 243 x1 |= (x1 << 8) 244 x2 |= (x2 << 8) 245 x3 |= (x3 << 8) 246 x0 &= 0x00FF00FF00FF00FF 247 x1 &= 0x00FF00FF00FF00FF 248 x2 &= 0x00FF00FF00FF00FF 249 x3 &= 0x00FF00FF00FF00FF 250 *q0 = x0 | (x2 << 8) 251 *q1 = x1 | (x3 << 8) 252 } 253 254 func InterleaveOut(w []uint32, q0, q1 uint64) { 255 var x0, x1, x2, x3 uint64 256 257 _ = w[3] 258 x0 = q0 & 0x00FF00FF00FF00FF 259 x1 = q1 & 0x00FF00FF00FF00FF 260 x2 = (q0 >> 8) & 0x00FF00FF00FF00FF 261 x3 = (q1 >> 8) & 0x00FF00FF00FF00FF 262 x0 |= (x0 >> 8) 263 x1 |= (x1 >> 8) 264 x2 |= (x2 >> 8) 265 x3 |= (x3 >> 8) 266 x0 &= 0x0000FFFF0000FFFF 267 x1 &= 0x0000FFFF0000FFFF 268 x2 &= 0x0000FFFF0000FFFF 269 x3 &= 0x0000FFFF0000FFFF 270 w[0] = uint32(x0) | uint32(x0>>16) 271 w[1] = uint32(x1) | uint32(x1>>16) 272 w[2] = uint32(x2) | uint32(x2>>16) 273 w[3] = uint32(x3) | uint32(x3>>16) 274 } 275 276 func AddRoundKey(q *[8]uint64, sk []uint64) { 277 _ = sk[7] 278 279 q[0] ^= sk[0] 280 q[1] ^= sk[1] 281 q[2] ^= sk[2] 282 q[3] ^= sk[3] 283 q[4] ^= sk[4] 284 q[5] ^= sk[5] 285 q[6] ^= sk[6] 286 q[7] ^= sk[7] 287 } 288 289 func subWord(x uint32) uint32 { 290 var q [8]uint64 291 292 q[0] = uint64(x) 293 Ortho(q[:]) 294 Sbox(&q) 295 Ortho(q[:]) 296 x = uint32(q[0]) 297 memwipeU64(q[:]) 298 return x 299 } 300 301 func Keysched(compSkey []uint64, key []byte) int { 302 numRounds := 0 303 keyLen := len(key) 304 switch keyLen { 305 case 16: 306 numRounds = 10 307 case 24: 308 numRounds = 12 309 case 32: 310 numRounds = 14 311 default: 312 panic("aes/impl32: Keysched: invalid key length") 313 } 314 315 var skey [60]uint32 316 nk := keyLen >> 2 317 nkf := (numRounds + 1) << 2 318 for i := 0; i < nk; i++ { 319 skey[i] = binary.LittleEndian.Uint32(key[i<<2:]) 320 } 321 tmp := skey[(keyLen>>2)-1] 322 for i, j, k := nk, 0, 0; i < nkf; i++ { 323 if j == 0 { 324 tmp = (tmp << 24) | (tmp >> 8) 325 tmp = subWord(tmp) ^ uint32(rcon[k]) 326 } else if nk > 6 && j == 4 { 327 tmp = subWord(tmp) 328 } 329 tmp ^= skey[i-nk] 330 skey[i] = tmp 331 if j++; j == nk { 332 j = 0 333 k++ 334 } 335 } 336 337 var q [8]uint64 338 for i, j := 0, 0; i < nkf; i, j = i+4, j+2 { 339 InterleaveIn(&q[0], &q[4], skey[i:]) 340 q[1] = q[0] 341 q[2] = q[0] 342 q[3] = q[0] 343 q[5] = q[4] 344 q[6] = q[4] 345 q[7] = q[4] 346 Ortho(q[:]) 347 compSkey[j+0] = (q[0] & 0x1111111111111111) | 348 (q[1] & 0x2222222222222222) | (q[2] & 0x4444444444444444) | 349 (q[3] & 0x8888888888888888) 350 compSkey[j+1] = (q[4] & 0x1111111111111111) | 351 (q[5] & 0x2222222222222222) | (q[6] & 0x4444444444444444) | 352 (q[7] & 0x8888888888888888) 353 } 354 355 for i := range skey { 356 skey[i] = 0 357 } 358 memwipeU64(q[:]) 359 360 return numRounds 361 } 362 363 func SkeyExpand(skey []uint64, numRounds int, compSkey []uint64) { 364 n := (numRounds + 1) << 1 365 for u, v := 0, 0; u < n; u, v = u+1, v+4 { 366 x0 := compSkey[u] 367 x1, x2, x3 := x0, x0, x0 368 x0 &= 0x1111111111111111 369 x1 &= 0x2222222222222222 370 x2 &= 0x4444444444444444 371 x3 &= 0x8888888888888888 372 x1 >>= 1 373 x2 >>= 2 374 x3 >>= 3 375 skey[v+0] = (x0 << 4) - x0 376 skey[v+1] = (x1 << 4) - x1 377 skey[v+2] = (x2 << 4) - x2 378 skey[v+3] = (x3 << 4) - x3 379 } 380 } 381 382 func RkeyOrtho(qq []uint64, key []byte) { 383 var skey [4]uint32 384 var compSkey [2]uint64 385 386 for i := 0; i < 4; i++ { 387 skey[i] = binary.LittleEndian.Uint32(key[i<<2:]) 388 } 389 390 var q [8]uint64 391 InterleaveIn(&q[0], &q[4], skey[:]) 392 q[1] = q[0] 393 q[2] = q[0] 394 q[3] = q[0] 395 q[5] = q[4] 396 q[6] = q[4] 397 q[7] = q[4] 398 Ortho(q[:]) 399 compSkey[0] = (q[0] & 0x1111111111111111) | 400 (q[1] & 0x2222222222222222) | (q[2] & 0x4444444444444444) | 401 (q[3] & 0x8888888888888888) 402 compSkey[1] = (q[4] & 0x1111111111111111) | 403 (q[5] & 0x2222222222222222) | (q[6] & 0x4444444444444444) | 404 (q[7] & 0x8888888888888888) 405 406 for u := 0; u < 2; u++ { 407 x0 := compSkey[u] 408 x1, x2, x3 := x0, x0, x0 409 x0 &= 0x1111111111111111 410 x1 &= 0x2222222222222222 411 x2 &= 0x4444444444444444 412 x3 &= 0x8888888888888888 413 x1 >>= 1 414 x2 >>= 2 415 x3 >>= 3 416 qq[u*4+0] = (x0 << 4) - x0 417 qq[u*4+1] = (x1 << 4) - x1 418 qq[u*4+2] = (x2 << 4) - x2 419 qq[u*4+3] = (x3 << 4) - x3 420 } 421 422 for i := range skey { 423 skey[i] = 0 424 } 425 memwipeU64(compSkey[:]) 426 memwipeU64(q[:]) 427 } 428 429 func Load4xU32(q *[8]uint64, src []byte) { 430 var w [4]uint32 431 432 w[0] = binary.LittleEndian.Uint32(src[:]) 433 w[1] = binary.LittleEndian.Uint32(src[4:]) 434 w[2] = binary.LittleEndian.Uint32(src[8:]) 435 w[3] = binary.LittleEndian.Uint32(src[12:]) 436 InterleaveIn(&q[0], &q[4], w[:]) 437 Ortho(q[:]) 438 } 439 440 func Load16xU32(q *[8]uint64, src0, src1, src2, src3 []byte) { 441 var w [4]uint32 442 443 src := [][]byte{src0, src1, src2, src3} 444 for i, s := range src { 445 w[0] = binary.LittleEndian.Uint32(s[:]) 446 w[1] = binary.LittleEndian.Uint32(s[4:]) 447 w[2] = binary.LittleEndian.Uint32(s[8:]) 448 w[3] = binary.LittleEndian.Uint32(s[12:]) 449 InterleaveIn(&q[i], &q[i+4], w[:]) 450 } 451 Ortho(q[:]) 452 } 453 454 func Store4xU32(dst []byte, q *[8]uint64) { 455 var w [4]uint32 456 457 Ortho(q[:]) 458 InterleaveOut(w[:], q[0], q[4]) 459 binary.LittleEndian.PutUint32(dst[:], w[0]) 460 binary.LittleEndian.PutUint32(dst[4:], w[1]) 461 binary.LittleEndian.PutUint32(dst[8:], w[2]) 462 binary.LittleEndian.PutUint32(dst[12:], w[3]) 463 } 464 465 func Store16xU32(dst0, dst1, dst2, dst3 []byte, q *[8]uint64) { 466 var w [4]uint32 467 468 dst := [][]byte{dst0, dst1, dst2, dst3} 469 Ortho(q[:]) 470 for i, d := range dst { 471 InterleaveOut(w[:], q[i], q[i+4]) 472 binary.LittleEndian.PutUint32(d[:], w[0]) 473 binary.LittleEndian.PutUint32(d[4:], w[1]) 474 binary.LittleEndian.PutUint32(d[8:], w[2]) 475 binary.LittleEndian.PutUint32(d[12:], w[3]) 476 } 477 } 478 479 func rotr32(x uint64) uint64 { 480 return (x << 32) | (x >> 32) 481 } 482 483 func memwipeU64(s []uint64) { 484 for i := range s { 485 s[i] = 0 486 } 487 } 488 489 type block struct { 490 modes.BlockModesImpl 491 492 skExp [120]uint64 493 numRounds int 494 wasReset bool 495 } 496 497 func (b *block) BlockSize() int { 498 return 16 499 } 500 501 func (b *block) Stride() int { 502 return 4 503 } 504 505 func (b *block) Encrypt(dst, src []byte) { 506 var q [8]uint64 507 508 if b.wasReset { 509 panic("bsaes/ct64: Encrypt() called after Reset()") 510 } 511 512 Load4xU32(&q, src[:]) 513 encrypt(b.numRounds, b.skExp[:], &q) 514 Store4xU32(dst[:], &q) 515 } 516 517 func (b *block) Decrypt(dst, src []byte) { 518 var q [8]uint64 519 520 if b.wasReset { 521 panic("bsaes/ct64: Decrypt() called after Reset()") 522 } 523 524 Load4xU32(&q, src[:]) 525 decrypt(b.numRounds, b.skExp[:], &q) 526 Store4xU32(dst[:], &q) 527 } 528 529 func (b *block) BulkEncrypt(dst, src []byte) { 530 var q [8]uint64 531 532 if b.wasReset { 533 panic("bsaes/ct64: BulkEncrypt() called after Reset()") 534 } 535 536 Load16xU32(&q, src[0:], src[16:], src[32:], src[48:]) 537 encrypt(b.numRounds, b.skExp[:], &q) 538 Store16xU32(dst[0:], dst[16:], dst[32:], dst[48:], &q) 539 } 540 541 func (b *block) BulkDecrypt(dst, src []byte) { 542 var q [8]uint64 543 544 if b.wasReset { 545 panic("bsaes/ct64: BulkDecrypt() called after Reset()") 546 } 547 548 Load16xU32(&q, src[0:], src[16:], src[32:], src[48:]) 549 decrypt(b.numRounds, b.skExp[:], &q) 550 Store16xU32(dst[0:], dst[16:], dst[32:], dst[48:], &q) 551 } 552 553 func (b *block) Reset() { 554 if !b.wasReset { 555 b.wasReset = true 556 memwipeU64(b.skExp[:]) 557 } 558 } 559 560 // NewCipher creates and returns a new cipher.Block, backed by a Impl64. 561 func NewCipher(key []byte) cipher.Block { 562 var skey [30]uint64 563 defer memwipeU64(skey[:]) 564 565 b := new(block) 566 b.numRounds = Keysched(skey[:], key) 567 SkeyExpand(b.skExp[:], b.numRounds, skey[:]) 568 569 b.BlockModesImpl.Init(b) 570 571 return b 572 }