github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/Yawning/chacha20/chacha20_ref.go (about) 1 // chacha20_ref.go - Reference ChaCha20. 2 // 3 // To the extent possible under law, Yawning Angel has waived all copyright 4 // and related or neighboring rights to chacha20, using the Creative 5 // Commons "CC0" public domain dedication. See LICENSE or 6 // <http://creativecommons.org/publicdomain/zero/1.0/> for full details. 7 8 // +build !go1.9 9 10 package chacha20 11 12 import ( 13 "encoding/binary" 14 "math" 15 "unsafe" 16 ) 17 18 func blocksRef(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf bool) { 19 if isIetf { 20 var totalBlocks uint64 21 totalBlocks = uint64(x[12]) + uint64(nrBlocks) 22 if totalBlocks > math.MaxUint32 { 23 panic("chacha20: Exceeded keystream per nonce limit") 24 } 25 } 26 27 // This routine ignores x[0]...x[4] in favor the const values since it's 28 // ever so slightly faster. 29 30 for n := 0; n < nrBlocks; n++ { 31 x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3 32 x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] 33 34 for i := chachaRounds; i > 0; i -= 2 { 35 // quarterround(x, 0, 4, 8, 12) 36 x0 += x4 37 x12 ^= x0 38 x12 = (x12 << 16) | (x12 >> 16) 39 x8 += x12 40 x4 ^= x8 41 x4 = (x4 << 12) | (x4 >> 20) 42 x0 += x4 43 x12 ^= x0 44 x12 = (x12 << 8) | (x12 >> 24) 45 x8 += x12 46 x4 ^= x8 47 x4 = (x4 << 7) | (x4 >> 25) 48 49 // quarterround(x, 1, 5, 9, 13) 50 x1 += x5 51 x13 ^= x1 52 x13 = (x13 << 16) | (x13 >> 16) 53 x9 += x13 54 x5 ^= x9 55 x5 = (x5 << 12) | (x5 >> 20) 56 x1 += x5 57 x13 ^= x1 58 x13 = (x13 << 8) | (x13 >> 24) 59 x9 += x13 60 x5 ^= x9 61 x5 = (x5 << 7) | (x5 >> 25) 62 63 // quarterround(x, 2, 6, 10, 14) 64 x2 += x6 65 x14 ^= x2 66 x14 = (x14 << 16) | (x14 >> 16) 67 x10 += x14 68 x6 ^= x10 69 x6 = (x6 << 12) | (x6 >> 20) 70 x2 += x6 71 x14 ^= x2 72 x14 = (x14 << 8) | (x14 >> 24) 73 x10 += x14 74 x6 ^= x10 75 x6 = (x6 << 7) | (x6 >> 25) 76 77 // quarterround(x, 3, 7, 11, 15) 78 x3 += x7 79 x15 ^= x3 80 x15 = (x15 << 16) | (x15 >> 16) 81 x11 += x15 82 x7 ^= x11 83 x7 = (x7 << 12) | (x7 >> 20) 84 x3 += x7 85 x15 ^= x3 86 x15 = (x15 << 8) | (x15 >> 24) 87 x11 += x15 88 x7 ^= x11 89 x7 = (x7 << 7) | (x7 >> 25) 90 91 // quarterround(x, 0, 5, 10, 15) 92 x0 += x5 93 x15 ^= x0 94 x15 = (x15 << 16) | (x15 >> 16) 95 x10 += x15 96 x5 ^= x10 97 x5 = (x5 << 12) | (x5 >> 20) 98 x0 += x5 99 x15 ^= x0 100 x15 = (x15 << 8) | (x15 >> 24) 101 x10 += x15 102 x5 ^= x10 103 x5 = (x5 << 7) | (x5 >> 25) 104 105 // quarterround(x, 1, 6, 11, 12) 106 x1 += x6 107 x12 ^= x1 108 x12 = (x12 << 16) | (x12 >> 16) 109 x11 += x12 110 x6 ^= x11 111 x6 = (x6 << 12) | (x6 >> 20) 112 x1 += x6 113 x12 ^= x1 114 x12 = (x12 << 8) | (x12 >> 24) 115 x11 += x12 116 x6 ^= x11 117 x6 = (x6 << 7) | (x6 >> 25) 118 119 // quarterround(x, 2, 7, 8, 13) 120 x2 += x7 121 x13 ^= x2 122 x13 = (x13 << 16) | (x13 >> 16) 123 x8 += x13 124 x7 ^= x8 125 x7 = (x7 << 12) | (x7 >> 20) 126 x2 += x7 127 x13 ^= x2 128 x13 = (x13 << 8) | (x13 >> 24) 129 x8 += x13 130 x7 ^= x8 131 x7 = (x7 << 7) | (x7 >> 25) 132 133 // quarterround(x, 3, 4, 9, 14) 134 x3 += x4 135 x14 ^= x3 136 x14 = (x14 << 16) | (x14 >> 16) 137 x9 += x14 138 x4 ^= x9 139 x4 = (x4 << 12) | (x4 >> 20) 140 x3 += x4 141 x14 ^= x3 142 x14 = (x14 << 8) | (x14 >> 24) 143 x9 += x14 144 x4 ^= x9 145 x4 = (x4 << 7) | (x4 >> 25) 146 } 147 148 // On amd64 at least, this is a rather big boost. 149 if useUnsafe { 150 if in != nil { 151 inArr := (*[16]uint32)(unsafe.Pointer(&in[n*BlockSize])) 152 outArr := (*[16]uint32)(unsafe.Pointer(&out[n*BlockSize])) 153 outArr[0] = inArr[0] ^ (x0 + sigma0) 154 outArr[1] = inArr[1] ^ (x1 + sigma1) 155 outArr[2] = inArr[2] ^ (x2 + sigma2) 156 outArr[3] = inArr[3] ^ (x3 + sigma3) 157 outArr[4] = inArr[4] ^ (x4 + x[4]) 158 outArr[5] = inArr[5] ^ (x5 + x[5]) 159 outArr[6] = inArr[6] ^ (x6 + x[6]) 160 outArr[7] = inArr[7] ^ (x7 + x[7]) 161 outArr[8] = inArr[8] ^ (x8 + x[8]) 162 outArr[9] = inArr[9] ^ (x9 + x[9]) 163 outArr[10] = inArr[10] ^ (x10 + x[10]) 164 outArr[11] = inArr[11] ^ (x11 + x[11]) 165 outArr[12] = inArr[12] ^ (x12 + x[12]) 166 outArr[13] = inArr[13] ^ (x13 + x[13]) 167 outArr[14] = inArr[14] ^ (x14 + x[14]) 168 outArr[15] = inArr[15] ^ (x15 + x[15]) 169 } else { 170 outArr := (*[16]uint32)(unsafe.Pointer(&out[n*BlockSize])) 171 outArr[0] = x0 + sigma0 172 outArr[1] = x1 + sigma1 173 outArr[2] = x2 + sigma2 174 outArr[3] = x3 + sigma3 175 outArr[4] = x4 + x[4] 176 outArr[5] = x5 + x[5] 177 outArr[6] = x6 + x[6] 178 outArr[7] = x7 + x[7] 179 outArr[8] = x8 + x[8] 180 outArr[9] = x9 + x[9] 181 outArr[10] = x10 + x[10] 182 outArr[11] = x11 + x[11] 183 outArr[12] = x12 + x[12] 184 outArr[13] = x13 + x[13] 185 outArr[14] = x14 + x[14] 186 outArr[15] = x15 + x[15] 187 } 188 } else { 189 // Slow path, either the architecture cares about alignment, or is not little endian. 190 x0 += sigma0 191 x1 += sigma1 192 x2 += sigma2 193 x3 += sigma3 194 x4 += x[4] 195 x5 += x[5] 196 x6 += x[6] 197 x7 += x[7] 198 x8 += x[8] 199 x9 += x[9] 200 x10 += x[10] 201 x11 += x[11] 202 x12 += x[12] 203 x13 += x[13] 204 x14 += x[14] 205 x15 += x[15] 206 if in != nil { 207 binary.LittleEndian.PutUint32(out[0:4], binary.LittleEndian.Uint32(in[0:4])^x0) 208 binary.LittleEndian.PutUint32(out[4:8], binary.LittleEndian.Uint32(in[4:8])^x1) 209 binary.LittleEndian.PutUint32(out[8:12], binary.LittleEndian.Uint32(in[8:12])^x2) 210 binary.LittleEndian.PutUint32(out[12:16], binary.LittleEndian.Uint32(in[12:16])^x3) 211 binary.LittleEndian.PutUint32(out[16:20], binary.LittleEndian.Uint32(in[16:20])^x4) 212 binary.LittleEndian.PutUint32(out[20:24], binary.LittleEndian.Uint32(in[20:24])^x5) 213 binary.LittleEndian.PutUint32(out[24:28], binary.LittleEndian.Uint32(in[24:28])^x6) 214 binary.LittleEndian.PutUint32(out[28:32], binary.LittleEndian.Uint32(in[28:32])^x7) 215 binary.LittleEndian.PutUint32(out[32:36], binary.LittleEndian.Uint32(in[32:36])^x8) 216 binary.LittleEndian.PutUint32(out[36:40], binary.LittleEndian.Uint32(in[36:40])^x9) 217 binary.LittleEndian.PutUint32(out[40:44], binary.LittleEndian.Uint32(in[40:44])^x10) 218 binary.LittleEndian.PutUint32(out[44:48], binary.LittleEndian.Uint32(in[44:48])^x11) 219 binary.LittleEndian.PutUint32(out[48:52], binary.LittleEndian.Uint32(in[48:52])^x12) 220 binary.LittleEndian.PutUint32(out[52:56], binary.LittleEndian.Uint32(in[52:56])^x13) 221 binary.LittleEndian.PutUint32(out[56:60], binary.LittleEndian.Uint32(in[56:60])^x14) 222 binary.LittleEndian.PutUint32(out[60:64], binary.LittleEndian.Uint32(in[60:64])^x15) 223 in = in[BlockSize:] 224 } else { 225 binary.LittleEndian.PutUint32(out[0:4], x0) 226 binary.LittleEndian.PutUint32(out[4:8], x1) 227 binary.LittleEndian.PutUint32(out[8:12], x2) 228 binary.LittleEndian.PutUint32(out[12:16], x3) 229 binary.LittleEndian.PutUint32(out[16:20], x4) 230 binary.LittleEndian.PutUint32(out[20:24], x5) 231 binary.LittleEndian.PutUint32(out[24:28], x6) 232 binary.LittleEndian.PutUint32(out[28:32], x7) 233 binary.LittleEndian.PutUint32(out[32:36], x8) 234 binary.LittleEndian.PutUint32(out[36:40], x9) 235 binary.LittleEndian.PutUint32(out[40:44], x10) 236 binary.LittleEndian.PutUint32(out[44:48], x11) 237 binary.LittleEndian.PutUint32(out[48:52], x12) 238 binary.LittleEndian.PutUint32(out[52:56], x13) 239 binary.LittleEndian.PutUint32(out[56:60], x14) 240 binary.LittleEndian.PutUint32(out[60:64], x15) 241 } 242 out = out[BlockSize:] 243 } 244 245 // Stoping at 2^70 bytes per nonce is the user's responsibility. 246 ctr := uint64(x[13])<<32 | uint64(x[12]) 247 ctr++ 248 x[12] = uint32(ctr) 249 x[13] = uint32(ctr >> 32) 250 } 251 } 252 253 func hChaChaRef(x *[stateSize]uint32, out *[32]byte) { 254 x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3 255 x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11] 256 257 for i := chachaRounds; i > 0; i -= 2 { 258 // quarterround(x, 0, 4, 8, 12) 259 x0 += x4 260 x12 ^= x0 261 x12 = (x12 << 16) | (x12 >> 16) 262 x8 += x12 263 x4 ^= x8 264 x4 = (x4 << 12) | (x4 >> 20) 265 x0 += x4 266 x12 ^= x0 267 x12 = (x12 << 8) | (x12 >> 24) 268 x8 += x12 269 x4 ^= x8 270 x4 = (x4 << 7) | (x4 >> 25) 271 272 // quarterround(x, 1, 5, 9, 13) 273 x1 += x5 274 x13 ^= x1 275 x13 = (x13 << 16) | (x13 >> 16) 276 x9 += x13 277 x5 ^= x9 278 x5 = (x5 << 12) | (x5 >> 20) 279 x1 += x5 280 x13 ^= x1 281 x13 = (x13 << 8) | (x13 >> 24) 282 x9 += x13 283 x5 ^= x9 284 x5 = (x5 << 7) | (x5 >> 25) 285 286 // quarterround(x, 2, 6, 10, 14) 287 x2 += x6 288 x14 ^= x2 289 x14 = (x14 << 16) | (x14 >> 16) 290 x10 += x14 291 x6 ^= x10 292 x6 = (x6 << 12) | (x6 >> 20) 293 x2 += x6 294 x14 ^= x2 295 x14 = (x14 << 8) | (x14 >> 24) 296 x10 += x14 297 x6 ^= x10 298 x6 = (x6 << 7) | (x6 >> 25) 299 300 // quarterround(x, 3, 7, 11, 15) 301 x3 += x7 302 x15 ^= x3 303 x15 = (x15 << 16) | (x15 >> 16) 304 x11 += x15 305 x7 ^= x11 306 x7 = (x7 << 12) | (x7 >> 20) 307 x3 += x7 308 x15 ^= x3 309 x15 = (x15 << 8) | (x15 >> 24) 310 x11 += x15 311 x7 ^= x11 312 x7 = (x7 << 7) | (x7 >> 25) 313 314 // quarterround(x, 0, 5, 10, 15) 315 x0 += x5 316 x15 ^= x0 317 x15 = (x15 << 16) | (x15 >> 16) 318 x10 += x15 319 x5 ^= x10 320 x5 = (x5 << 12) | (x5 >> 20) 321 x0 += x5 322 x15 ^= x0 323 x15 = (x15 << 8) | (x15 >> 24) 324 x10 += x15 325 x5 ^= x10 326 x5 = (x5 << 7) | (x5 >> 25) 327 328 // quarterround(x, 1, 6, 11, 12) 329 x1 += x6 330 x12 ^= x1 331 x12 = (x12 << 16) | (x12 >> 16) 332 x11 += x12 333 x6 ^= x11 334 x6 = (x6 << 12) | (x6 >> 20) 335 x1 += x6 336 x12 ^= x1 337 x12 = (x12 << 8) | (x12 >> 24) 338 x11 += x12 339 x6 ^= x11 340 x6 = (x6 << 7) | (x6 >> 25) 341 342 // quarterround(x, 2, 7, 8, 13) 343 x2 += x7 344 x13 ^= x2 345 x13 = (x13 << 16) | (x13 >> 16) 346 x8 += x13 347 x7 ^= x8 348 x7 = (x7 << 12) | (x7 >> 20) 349 x2 += x7 350 x13 ^= x2 351 x13 = (x13 << 8) | (x13 >> 24) 352 x8 += x13 353 x7 ^= x8 354 x7 = (x7 << 7) | (x7 >> 25) 355 356 // quarterround(x, 3, 4, 9, 14) 357 x3 += x4 358 x14 ^= x3 359 x14 = (x14 << 16) | (x14 >> 16) 360 x9 += x14 361 x4 ^= x9 362 x4 = (x4 << 12) | (x4 >> 20) 363 x3 += x4 364 x14 ^= x3 365 x14 = (x14 << 8) | (x14 >> 24) 366 x9 += x14 367 x4 ^= x9 368 x4 = (x4 << 7) | (x4 >> 25) 369 } 370 371 // HChaCha returns x0...x3 | x12...x15, which corresponds to the 372 // indexes of the ChaCha constant and the indexes of the IV. 373 if useUnsafe { 374 outArr := (*[16]uint32)(unsafe.Pointer(&out[0])) 375 outArr[0] = x0 376 outArr[1] = x1 377 outArr[2] = x2 378 outArr[3] = x3 379 outArr[4] = x12 380 outArr[5] = x13 381 outArr[6] = x14 382 outArr[7] = x15 383 } else { 384 binary.LittleEndian.PutUint32(out[0:4], x0) 385 binary.LittleEndian.PutUint32(out[4:8], x1) 386 binary.LittleEndian.PutUint32(out[8:12], x2) 387 binary.LittleEndian.PutUint32(out[12:16], x3) 388 binary.LittleEndian.PutUint32(out[16:20], x12) 389 binary.LittleEndian.PutUint32(out[20:24], x13) 390 binary.LittleEndian.PutUint32(out[24:28], x14) 391 binary.LittleEndian.PutUint32(out[28:32], x15) 392 } 393 return 394 }