github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/Yawning/chacha20/chacha20_ref.go (about)

     1  // chacha20_ref.go - Reference ChaCha20.
     2  //
     3  // To the extent possible under law, Yawning Angel has waived all copyright
     4  // and related or neighboring rights to chacha20, using the Creative
     5  // Commons "CC0" public domain dedication. See LICENSE or
     6  // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
     7  
     8  // +build !go1.9
     9  
    10  package chacha20
    11  
    12  import (
    13  	"encoding/binary"
    14  	"math"
    15  	"unsafe"
    16  )
    17  
    18  func blocksRef(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf bool) {
    19  	if isIetf {
    20  		var totalBlocks uint64
    21  		totalBlocks = uint64(x[12]) + uint64(nrBlocks)
    22  		if totalBlocks > math.MaxUint32 {
    23  			panic("chacha20: Exceeded keystream per nonce limit")
    24  		}
    25  	}
    26  
    27  	// This routine ignores x[0]...x[4] in favor the const values since it's
    28  	// ever so slightly faster.
    29  
    30  	for n := 0; n < nrBlocks; n++ {
    31  		x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3
    32  		x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
    33  
    34  		for i := chachaRounds; i > 0; i -= 2 {
    35  			// quarterround(x, 0, 4, 8, 12)
    36  			x0 += x4
    37  			x12 ^= x0
    38  			x12 = (x12 << 16) | (x12 >> 16)
    39  			x8 += x12
    40  			x4 ^= x8
    41  			x4 = (x4 << 12) | (x4 >> 20)
    42  			x0 += x4
    43  			x12 ^= x0
    44  			x12 = (x12 << 8) | (x12 >> 24)
    45  			x8 += x12
    46  			x4 ^= x8
    47  			x4 = (x4 << 7) | (x4 >> 25)
    48  
    49  			// quarterround(x, 1, 5, 9, 13)
    50  			x1 += x5
    51  			x13 ^= x1
    52  			x13 = (x13 << 16) | (x13 >> 16)
    53  			x9 += x13
    54  			x5 ^= x9
    55  			x5 = (x5 << 12) | (x5 >> 20)
    56  			x1 += x5
    57  			x13 ^= x1
    58  			x13 = (x13 << 8) | (x13 >> 24)
    59  			x9 += x13
    60  			x5 ^= x9
    61  			x5 = (x5 << 7) | (x5 >> 25)
    62  
    63  			// quarterround(x, 2, 6, 10, 14)
    64  			x2 += x6
    65  			x14 ^= x2
    66  			x14 = (x14 << 16) | (x14 >> 16)
    67  			x10 += x14
    68  			x6 ^= x10
    69  			x6 = (x6 << 12) | (x6 >> 20)
    70  			x2 += x6
    71  			x14 ^= x2
    72  			x14 = (x14 << 8) | (x14 >> 24)
    73  			x10 += x14
    74  			x6 ^= x10
    75  			x6 = (x6 << 7) | (x6 >> 25)
    76  
    77  			// quarterround(x, 3, 7, 11, 15)
    78  			x3 += x7
    79  			x15 ^= x3
    80  			x15 = (x15 << 16) | (x15 >> 16)
    81  			x11 += x15
    82  			x7 ^= x11
    83  			x7 = (x7 << 12) | (x7 >> 20)
    84  			x3 += x7
    85  			x15 ^= x3
    86  			x15 = (x15 << 8) | (x15 >> 24)
    87  			x11 += x15
    88  			x7 ^= x11
    89  			x7 = (x7 << 7) | (x7 >> 25)
    90  
    91  			// quarterround(x, 0, 5, 10, 15)
    92  			x0 += x5
    93  			x15 ^= x0
    94  			x15 = (x15 << 16) | (x15 >> 16)
    95  			x10 += x15
    96  			x5 ^= x10
    97  			x5 = (x5 << 12) | (x5 >> 20)
    98  			x0 += x5
    99  			x15 ^= x0
   100  			x15 = (x15 << 8) | (x15 >> 24)
   101  			x10 += x15
   102  			x5 ^= x10
   103  			x5 = (x5 << 7) | (x5 >> 25)
   104  
   105  			// quarterround(x, 1, 6, 11, 12)
   106  			x1 += x6
   107  			x12 ^= x1
   108  			x12 = (x12 << 16) | (x12 >> 16)
   109  			x11 += x12
   110  			x6 ^= x11
   111  			x6 = (x6 << 12) | (x6 >> 20)
   112  			x1 += x6
   113  			x12 ^= x1
   114  			x12 = (x12 << 8) | (x12 >> 24)
   115  			x11 += x12
   116  			x6 ^= x11
   117  			x6 = (x6 << 7) | (x6 >> 25)
   118  
   119  			// quarterround(x, 2, 7, 8, 13)
   120  			x2 += x7
   121  			x13 ^= x2
   122  			x13 = (x13 << 16) | (x13 >> 16)
   123  			x8 += x13
   124  			x7 ^= x8
   125  			x7 = (x7 << 12) | (x7 >> 20)
   126  			x2 += x7
   127  			x13 ^= x2
   128  			x13 = (x13 << 8) | (x13 >> 24)
   129  			x8 += x13
   130  			x7 ^= x8
   131  			x7 = (x7 << 7) | (x7 >> 25)
   132  
   133  			// quarterround(x, 3, 4, 9, 14)
   134  			x3 += x4
   135  			x14 ^= x3
   136  			x14 = (x14 << 16) | (x14 >> 16)
   137  			x9 += x14
   138  			x4 ^= x9
   139  			x4 = (x4 << 12) | (x4 >> 20)
   140  			x3 += x4
   141  			x14 ^= x3
   142  			x14 = (x14 << 8) | (x14 >> 24)
   143  			x9 += x14
   144  			x4 ^= x9
   145  			x4 = (x4 << 7) | (x4 >> 25)
   146  		}
   147  
   148  		// On amd64 at least, this is a rather big boost.
   149  		if useUnsafe {
   150  			if in != nil {
   151  				inArr := (*[16]uint32)(unsafe.Pointer(&in[n*BlockSize]))
   152  				outArr := (*[16]uint32)(unsafe.Pointer(&out[n*BlockSize]))
   153  				outArr[0] = inArr[0] ^ (x0 + sigma0)
   154  				outArr[1] = inArr[1] ^ (x1 + sigma1)
   155  				outArr[2] = inArr[2] ^ (x2 + sigma2)
   156  				outArr[3] = inArr[3] ^ (x3 + sigma3)
   157  				outArr[4] = inArr[4] ^ (x4 + x[4])
   158  				outArr[5] = inArr[5] ^ (x5 + x[5])
   159  				outArr[6] = inArr[6] ^ (x6 + x[6])
   160  				outArr[7] = inArr[7] ^ (x7 + x[7])
   161  				outArr[8] = inArr[8] ^ (x8 + x[8])
   162  				outArr[9] = inArr[9] ^ (x9 + x[9])
   163  				outArr[10] = inArr[10] ^ (x10 + x[10])
   164  				outArr[11] = inArr[11] ^ (x11 + x[11])
   165  				outArr[12] = inArr[12] ^ (x12 + x[12])
   166  				outArr[13] = inArr[13] ^ (x13 + x[13])
   167  				outArr[14] = inArr[14] ^ (x14 + x[14])
   168  				outArr[15] = inArr[15] ^ (x15 + x[15])
   169  			} else {
   170  				outArr := (*[16]uint32)(unsafe.Pointer(&out[n*BlockSize]))
   171  				outArr[0] = x0 + sigma0
   172  				outArr[1] = x1 + sigma1
   173  				outArr[2] = x2 + sigma2
   174  				outArr[3] = x3 + sigma3
   175  				outArr[4] = x4 + x[4]
   176  				outArr[5] = x5 + x[5]
   177  				outArr[6] = x6 + x[6]
   178  				outArr[7] = x7 + x[7]
   179  				outArr[8] = x8 + x[8]
   180  				outArr[9] = x9 + x[9]
   181  				outArr[10] = x10 + x[10]
   182  				outArr[11] = x11 + x[11]
   183  				outArr[12] = x12 + x[12]
   184  				outArr[13] = x13 + x[13]
   185  				outArr[14] = x14 + x[14]
   186  				outArr[15] = x15 + x[15]
   187  			}
   188  		} else {
   189  			// Slow path, either the architecture cares about alignment, or is not little endian.
   190  			x0 += sigma0
   191  			x1 += sigma1
   192  			x2 += sigma2
   193  			x3 += sigma3
   194  			x4 += x[4]
   195  			x5 += x[5]
   196  			x6 += x[6]
   197  			x7 += x[7]
   198  			x8 += x[8]
   199  			x9 += x[9]
   200  			x10 += x[10]
   201  			x11 += x[11]
   202  			x12 += x[12]
   203  			x13 += x[13]
   204  			x14 += x[14]
   205  			x15 += x[15]
   206  			if in != nil {
   207  				binary.LittleEndian.PutUint32(out[0:4], binary.LittleEndian.Uint32(in[0:4])^x0)
   208  				binary.LittleEndian.PutUint32(out[4:8], binary.LittleEndian.Uint32(in[4:8])^x1)
   209  				binary.LittleEndian.PutUint32(out[8:12], binary.LittleEndian.Uint32(in[8:12])^x2)
   210  				binary.LittleEndian.PutUint32(out[12:16], binary.LittleEndian.Uint32(in[12:16])^x3)
   211  				binary.LittleEndian.PutUint32(out[16:20], binary.LittleEndian.Uint32(in[16:20])^x4)
   212  				binary.LittleEndian.PutUint32(out[20:24], binary.LittleEndian.Uint32(in[20:24])^x5)
   213  				binary.LittleEndian.PutUint32(out[24:28], binary.LittleEndian.Uint32(in[24:28])^x6)
   214  				binary.LittleEndian.PutUint32(out[28:32], binary.LittleEndian.Uint32(in[28:32])^x7)
   215  				binary.LittleEndian.PutUint32(out[32:36], binary.LittleEndian.Uint32(in[32:36])^x8)
   216  				binary.LittleEndian.PutUint32(out[36:40], binary.LittleEndian.Uint32(in[36:40])^x9)
   217  				binary.LittleEndian.PutUint32(out[40:44], binary.LittleEndian.Uint32(in[40:44])^x10)
   218  				binary.LittleEndian.PutUint32(out[44:48], binary.LittleEndian.Uint32(in[44:48])^x11)
   219  				binary.LittleEndian.PutUint32(out[48:52], binary.LittleEndian.Uint32(in[48:52])^x12)
   220  				binary.LittleEndian.PutUint32(out[52:56], binary.LittleEndian.Uint32(in[52:56])^x13)
   221  				binary.LittleEndian.PutUint32(out[56:60], binary.LittleEndian.Uint32(in[56:60])^x14)
   222  				binary.LittleEndian.PutUint32(out[60:64], binary.LittleEndian.Uint32(in[60:64])^x15)
   223  				in = in[BlockSize:]
   224  			} else {
   225  				binary.LittleEndian.PutUint32(out[0:4], x0)
   226  				binary.LittleEndian.PutUint32(out[4:8], x1)
   227  				binary.LittleEndian.PutUint32(out[8:12], x2)
   228  				binary.LittleEndian.PutUint32(out[12:16], x3)
   229  				binary.LittleEndian.PutUint32(out[16:20], x4)
   230  				binary.LittleEndian.PutUint32(out[20:24], x5)
   231  				binary.LittleEndian.PutUint32(out[24:28], x6)
   232  				binary.LittleEndian.PutUint32(out[28:32], x7)
   233  				binary.LittleEndian.PutUint32(out[32:36], x8)
   234  				binary.LittleEndian.PutUint32(out[36:40], x9)
   235  				binary.LittleEndian.PutUint32(out[40:44], x10)
   236  				binary.LittleEndian.PutUint32(out[44:48], x11)
   237  				binary.LittleEndian.PutUint32(out[48:52], x12)
   238  				binary.LittleEndian.PutUint32(out[52:56], x13)
   239  				binary.LittleEndian.PutUint32(out[56:60], x14)
   240  				binary.LittleEndian.PutUint32(out[60:64], x15)
   241  			}
   242  			out = out[BlockSize:]
   243  		}
   244  
   245  		// Stoping at 2^70 bytes per nonce is the user's responsibility.
   246  		ctr := uint64(x[13])<<32 | uint64(x[12])
   247  		ctr++
   248  		x[12] = uint32(ctr)
   249  		x[13] = uint32(ctr >> 32)
   250  	}
   251  }
   252  
   253  func hChaChaRef(x *[stateSize]uint32, out *[32]byte) {
   254  	x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3
   255  	x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11]
   256  
   257  	for i := chachaRounds; i > 0; i -= 2 {
   258  		// quarterround(x, 0, 4, 8, 12)
   259  		x0 += x4
   260  		x12 ^= x0
   261  		x12 = (x12 << 16) | (x12 >> 16)
   262  		x8 += x12
   263  		x4 ^= x8
   264  		x4 = (x4 << 12) | (x4 >> 20)
   265  		x0 += x4
   266  		x12 ^= x0
   267  		x12 = (x12 << 8) | (x12 >> 24)
   268  		x8 += x12
   269  		x4 ^= x8
   270  		x4 = (x4 << 7) | (x4 >> 25)
   271  
   272  		// quarterround(x, 1, 5, 9, 13)
   273  		x1 += x5
   274  		x13 ^= x1
   275  		x13 = (x13 << 16) | (x13 >> 16)
   276  		x9 += x13
   277  		x5 ^= x9
   278  		x5 = (x5 << 12) | (x5 >> 20)
   279  		x1 += x5
   280  		x13 ^= x1
   281  		x13 = (x13 << 8) | (x13 >> 24)
   282  		x9 += x13
   283  		x5 ^= x9
   284  		x5 = (x5 << 7) | (x5 >> 25)
   285  
   286  		// quarterround(x, 2, 6, 10, 14)
   287  		x2 += x6
   288  		x14 ^= x2
   289  		x14 = (x14 << 16) | (x14 >> 16)
   290  		x10 += x14
   291  		x6 ^= x10
   292  		x6 = (x6 << 12) | (x6 >> 20)
   293  		x2 += x6
   294  		x14 ^= x2
   295  		x14 = (x14 << 8) | (x14 >> 24)
   296  		x10 += x14
   297  		x6 ^= x10
   298  		x6 = (x6 << 7) | (x6 >> 25)
   299  
   300  		// quarterround(x, 3, 7, 11, 15)
   301  		x3 += x7
   302  		x15 ^= x3
   303  		x15 = (x15 << 16) | (x15 >> 16)
   304  		x11 += x15
   305  		x7 ^= x11
   306  		x7 = (x7 << 12) | (x7 >> 20)
   307  		x3 += x7
   308  		x15 ^= x3
   309  		x15 = (x15 << 8) | (x15 >> 24)
   310  		x11 += x15
   311  		x7 ^= x11
   312  		x7 = (x7 << 7) | (x7 >> 25)
   313  
   314  		// quarterround(x, 0, 5, 10, 15)
   315  		x0 += x5
   316  		x15 ^= x0
   317  		x15 = (x15 << 16) | (x15 >> 16)
   318  		x10 += x15
   319  		x5 ^= x10
   320  		x5 = (x5 << 12) | (x5 >> 20)
   321  		x0 += x5
   322  		x15 ^= x0
   323  		x15 = (x15 << 8) | (x15 >> 24)
   324  		x10 += x15
   325  		x5 ^= x10
   326  		x5 = (x5 << 7) | (x5 >> 25)
   327  
   328  		// quarterround(x, 1, 6, 11, 12)
   329  		x1 += x6
   330  		x12 ^= x1
   331  		x12 = (x12 << 16) | (x12 >> 16)
   332  		x11 += x12
   333  		x6 ^= x11
   334  		x6 = (x6 << 12) | (x6 >> 20)
   335  		x1 += x6
   336  		x12 ^= x1
   337  		x12 = (x12 << 8) | (x12 >> 24)
   338  		x11 += x12
   339  		x6 ^= x11
   340  		x6 = (x6 << 7) | (x6 >> 25)
   341  
   342  		// quarterround(x, 2, 7, 8, 13)
   343  		x2 += x7
   344  		x13 ^= x2
   345  		x13 = (x13 << 16) | (x13 >> 16)
   346  		x8 += x13
   347  		x7 ^= x8
   348  		x7 = (x7 << 12) | (x7 >> 20)
   349  		x2 += x7
   350  		x13 ^= x2
   351  		x13 = (x13 << 8) | (x13 >> 24)
   352  		x8 += x13
   353  		x7 ^= x8
   354  		x7 = (x7 << 7) | (x7 >> 25)
   355  
   356  		// quarterround(x, 3, 4, 9, 14)
   357  		x3 += x4
   358  		x14 ^= x3
   359  		x14 = (x14 << 16) | (x14 >> 16)
   360  		x9 += x14
   361  		x4 ^= x9
   362  		x4 = (x4 << 12) | (x4 >> 20)
   363  		x3 += x4
   364  		x14 ^= x3
   365  		x14 = (x14 << 8) | (x14 >> 24)
   366  		x9 += x14
   367  		x4 ^= x9
   368  		x4 = (x4 << 7) | (x4 >> 25)
   369  	}
   370  
   371  	// HChaCha returns x0...x3 | x12...x15, which corresponds to the
   372  	// indexes of the ChaCha constant and the indexes of the IV.
   373  	if useUnsafe {
   374  		outArr := (*[16]uint32)(unsafe.Pointer(&out[0]))
   375  		outArr[0] = x0
   376  		outArr[1] = x1
   377  		outArr[2] = x2
   378  		outArr[3] = x3
   379  		outArr[4] = x12
   380  		outArr[5] = x13
   381  		outArr[6] = x14
   382  		outArr[7] = x15
   383  	} else {
   384  		binary.LittleEndian.PutUint32(out[0:4], x0)
   385  		binary.LittleEndian.PutUint32(out[4:8], x1)
   386  		binary.LittleEndian.PutUint32(out[8:12], x2)
   387  		binary.LittleEndian.PutUint32(out[12:16], x3)
   388  		binary.LittleEndian.PutUint32(out[16:20], x12)
   389  		binary.LittleEndian.PutUint32(out[20:24], x13)
   390  		binary.LittleEndian.PutUint32(out[24:28], x14)
   391  		binary.LittleEndian.PutUint32(out[28:32], x15)
   392  	}
   393  	return
   394  }