github.com/Asutorufa/yuhaiin@v0.3.6-0.20240502055049-7984da7023a0/pkg/net/proxy/shadowsocksr/cipher/camellia/camellia.go (about)

     1  // Use of this source code is governed by a license
     2  // that can be found in the LICENSE file.
     3  
     4  // Package camellia implements the camellia block cipher.
     5  // The cipher has block size of 128 bit (16 byte) and
     6  // accepts 128, 192 or 256 bit keys (16, 24, 32 byte).
     7  // Camellia was jointly developed by Mitsubishi Electric
     8  // and NTT of Japan.
     9  // Camellia was added to many crypto protocols (e.g. TLS).
    10  package camellia
    11  
    12  import (
    13  	"crypto/cipher"
    14  	"fmt"
    15  )
    16  
    17  // The block size of the camellia block cipher in bytes.
    18  const BlockSize = 16
    19  
    20  // from github.com/enceve/crypto/camellia/
    21  // NewCipher returns a new cipher.Block implementing the camellia cipher.
    22  // The key argument must be 128, 192 or 256 bit (16, 24, 32 byte).
    23  func NewCipher(key []byte) (cipher.Block, error) {
    24  	k := len(key)
    25  	if k == 16 {
    26  		c := new(blockCipher128)
    27  		c.keySchedule(key)
    28  		return c, nil
    29  	}
    30  	if k == 24 || k == 32 {
    31  		c := new(blockCipher256)
    32  		c.keySchedule(key)
    33  		return c, nil
    34  	}
    35  	return nil, fmt.Errorf("key size error: %d", len(key))
    36  }
    37  
    38  // The camellia cipher for 128 bit keys.
    39  type blockCipher128 struct {
    40  	sk [52]uint32 // The 52 32-bit subkeys
    41  }
    42  
    43  func (c *blockCipher128) BlockSize() int { return BlockSize }
    44  
    45  func (c *blockCipher128) Encrypt(dst, src []byte) {
    46  	if len(src) < BlockSize {
    47  		panic("camellia: src buffer to small")
    48  	}
    49  	if len(dst) < BlockSize {
    50  		panic("camellia: dst buffer to small")
    51  	}
    52  
    53  	r0 := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
    54  	r1 := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
    55  	r2 := uint32(src[8])<<24 | uint32(src[9])<<16 | uint32(src[10])<<8 | uint32(src[11])
    56  	r3 := uint32(src[12])<<24 | uint32(src[13])<<16 | uint32(src[14])<<8 | uint32(src[15])
    57  
    58  	k := &(c.sk)
    59  
    60  	r0 ^= k[0]
    61  	r1 ^= k[1]
    62  	r2 ^= k[2]
    63  	r3 ^= k[3]
    64  
    65  	f(&r0, &r1, &r2, &r3, k[4], k[5])
    66  	f(&r2, &r3, &r0, &r1, k[6], k[7])
    67  	f(&r0, &r1, &r2, &r3, k[8], k[9])
    68  	f(&r2, &r3, &r0, &r1, k[10], k[11])
    69  	f(&r0, &r1, &r2, &r3, k[12], k[13])
    70  	f(&r2, &r3, &r0, &r1, k[14], k[15])
    71  
    72  	t := r0 & k[16]
    73  	r1 ^= (t << 1) | (t >> (32 - 1))
    74  	r2 ^= r3 | k[19]
    75  	r0 ^= r1 | k[17]
    76  	t = r2 & k[18]
    77  	r3 ^= (t << 1) | (t >> (32 - 1))
    78  
    79  	f(&r0, &r1, &r2, &r3, k[20], k[21])
    80  	f(&r2, &r3, &r0, &r1, k[22], k[23])
    81  	f(&r0, &r1, &r2, &r3, k[24], k[25])
    82  	f(&r2, &r3, &r0, &r1, k[26], k[27])
    83  	f(&r0, &r1, &r2, &r3, k[28], k[29])
    84  	f(&r2, &r3, &r0, &r1, k[30], k[31])
    85  
    86  	t = r0 & k[32]
    87  	r1 ^= (t << 1) | (t >> (32 - 1))
    88  	r2 ^= r3 | k[35]
    89  	r0 ^= r1 | k[33]
    90  	t = r2 & k[34]
    91  	r3 ^= (t << 1) | (t >> (32 - 1))
    92  
    93  	f(&r0, &r1, &r2, &r3, k[36], k[37])
    94  	f(&r2, &r3, &r0, &r1, k[38], k[39])
    95  	f(&r0, &r1, &r2, &r3, k[40], k[41])
    96  	f(&r2, &r3, &r0, &r1, k[42], k[43])
    97  	f(&r0, &r1, &r2, &r3, k[44], k[45])
    98  	f(&r2, &r3, &r0, &r1, k[46], k[47])
    99  
   100  	r2 ^= k[48]
   101  	r3 ^= k[49]
   102  	r0 ^= k[50]
   103  	r1 ^= k[51]
   104  
   105  	dst[0] = byte(r2 >> 24)
   106  	dst[1] = byte(r2 >> 16)
   107  	dst[2] = byte(r2 >> 8)
   108  	dst[3] = byte(r2)
   109  	dst[4] = byte(r3 >> 24)
   110  	dst[5] = byte(r3 >> 16)
   111  	dst[6] = byte(r3 >> 8)
   112  	dst[7] = byte(r3)
   113  	dst[8] = byte(r0 >> 24)
   114  	dst[9] = byte(r0 >> 16)
   115  	dst[10] = byte(r0 >> 8)
   116  	dst[11] = byte(r0)
   117  	dst[12] = byte(r1 >> 24)
   118  	dst[13] = byte(r1 >> 16)
   119  	dst[14] = byte(r1 >> 8)
   120  	dst[15] = byte(r1)
   121  }
   122  
   123  func (c *blockCipher128) Decrypt(dst, src []byte) {
   124  	if len(src) < BlockSize {
   125  		panic("camellia: src buffer to small")
   126  	}
   127  	if len(dst) < BlockSize {
   128  		panic("camellia: dst buffer to small")
   129  	}
   130  
   131  	r0 := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
   132  	r1 := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
   133  	r2 := uint32(src[8])<<24 | uint32(src[9])<<16 | uint32(src[10])<<8 | uint32(src[11])
   134  	r3 := uint32(src[12])<<24 | uint32(src[13])<<16 | uint32(src[14])<<8 | uint32(src[15])
   135  
   136  	k := &(c.sk)
   137  
   138  	r3 ^= k[51]
   139  	r2 ^= k[50]
   140  	r1 ^= k[49]
   141  	r0 ^= k[48]
   142  
   143  	f(&r0, &r1, &r2, &r3, k[46], k[47])
   144  	f(&r2, &r3, &r0, &r1, k[44], k[45])
   145  	f(&r0, &r1, &r2, &r3, k[42], k[43])
   146  	f(&r2, &r3, &r0, &r1, k[40], k[41])
   147  	f(&r0, &r1, &r2, &r3, k[38], k[39])
   148  	f(&r2, &r3, &r0, &r1, k[36], k[37])
   149  
   150  	t := r0 & k[34]
   151  	r1 ^= (t << 1) | (t >> (32 - 1))
   152  	r2 ^= r3 | k[33]
   153  	r0 ^= r1 | k[35]
   154  	t = r2 & k[32]
   155  	r3 ^= (t << 1) | (t >> (32 - 1))
   156  
   157  	f(&r0, &r1, &r2, &r3, k[30], k[31])
   158  	f(&r2, &r3, &r0, &r1, k[28], k[29])
   159  	f(&r0, &r1, &r2, &r3, k[26], k[27])
   160  	f(&r2, &r3, &r0, &r1, k[24], k[25])
   161  	f(&r0, &r1, &r2, &r3, k[22], k[23])
   162  	f(&r2, &r3, &r0, &r1, k[20], k[21])
   163  
   164  	t = r0 & k[18]
   165  	r1 ^= (t << 1) | (t >> (32 - 1))
   166  	r2 ^= r3 | k[17]
   167  	r0 ^= r1 | k[19]
   168  	t = r2 & k[16]
   169  	r3 ^= (t << 1) | (t >> (32 - 1))
   170  
   171  	f(&r0, &r1, &r2, &r3, k[14], k[15])
   172  	f(&r2, &r3, &r0, &r1, k[12], k[13])
   173  	f(&r0, &r1, &r2, &r3, k[10], k[11])
   174  	f(&r2, &r3, &r0, &r1, k[8], k[9])
   175  	f(&r0, &r1, &r2, &r3, k[6], k[7])
   176  	f(&r2, &r3, &r0, &r1, k[4], k[5])
   177  
   178  	r1 ^= k[3]
   179  	r0 ^= k[2]
   180  	r3 ^= k[1]
   181  	r2 ^= k[0]
   182  
   183  	dst[0] = byte(r2 >> 24)
   184  	dst[1] = byte(r2 >> 16)
   185  	dst[2] = byte(r2 >> 8)
   186  	dst[3] = byte(r2)
   187  	dst[4] = byte(r3 >> 24)
   188  	dst[5] = byte(r3 >> 16)
   189  	dst[6] = byte(r3 >> 8)
   190  	dst[7] = byte(r3)
   191  	dst[8] = byte(r0 >> 24)
   192  	dst[9] = byte(r0 >> 16)
   193  	dst[10] = byte(r0 >> 8)
   194  	dst[11] = byte(r0)
   195  	dst[12] = byte(r1 >> 24)
   196  	dst[13] = byte(r1 >> 16)
   197  	dst[14] = byte(r1 >> 8)
   198  	dst[15] = byte(r1)
   199  }
   200  
   201  // The camellia cipher for 192 or 256 bit keys.
   202  type blockCipher256 struct {
   203  	sk [68]uint32 // The 68 32-bit subkeys
   204  }
   205  
   206  func (c *blockCipher256) BlockSize() int { return BlockSize }
   207  
   208  func (c *blockCipher256) Encrypt(dst, src []byte) {
   209  	if len(src) < BlockSize {
   210  		panic("camellia: src buffer to small")
   211  	}
   212  	if len(dst) < BlockSize {
   213  		panic("camellia: dst buffer to small")
   214  	}
   215  
   216  	r0 := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
   217  	r1 := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
   218  	r2 := uint32(src[8])<<24 | uint32(src[9])<<16 | uint32(src[10])<<8 | uint32(src[11])
   219  	r3 := uint32(src[12])<<24 | uint32(src[13])<<16 | uint32(src[14])<<8 | uint32(src[15])
   220  
   221  	k := &(c.sk)
   222  
   223  	r0 ^= k[0]
   224  	r1 ^= k[1]
   225  	r2 ^= k[2]
   226  	r3 ^= k[3]
   227  
   228  	f(&r0, &r1, &r2, &r3, k[4], k[5])
   229  	f(&r2, &r3, &r0, &r1, k[6], k[7])
   230  	f(&r0, &r1, &r2, &r3, k[8], k[9])
   231  	f(&r2, &r3, &r0, &r1, k[10], k[11])
   232  	f(&r0, &r1, &r2, &r3, k[12], k[13])
   233  	f(&r2, &r3, &r0, &r1, k[14], k[15])
   234  
   235  	t := r0 & k[16]
   236  	r1 ^= (t << 1) | (t >> (32 - 1))
   237  	r2 ^= r3 | k[19]
   238  	r0 ^= r1 | k[17]
   239  	t = r2 & k[18]
   240  	r3 ^= (t << 1) | (t >> (32 - 1))
   241  
   242  	f(&r0, &r1, &r2, &r3, k[20], k[21])
   243  	f(&r2, &r3, &r0, &r1, k[22], k[23])
   244  	f(&r0, &r1, &r2, &r3, k[24], k[25])
   245  	f(&r2, &r3, &r0, &r1, k[26], k[27])
   246  	f(&r0, &r1, &r2, &r3, k[28], k[29])
   247  	f(&r2, &r3, &r0, &r1, k[30], k[31])
   248  
   249  	t = r0 & k[32]
   250  	r1 ^= (t << 1) | (t >> (32 - 1))
   251  	r2 ^= r3 | k[35]
   252  	r0 ^= r1 | k[33]
   253  	t = r2 & k[34]
   254  	r3 ^= (t << 1) | (t >> (32 - 1))
   255  
   256  	f(&r0, &r1, &r2, &r3, k[36], k[37])
   257  	f(&r2, &r3, &r0, &r1, k[38], k[39])
   258  	f(&r0, &r1, &r2, &r3, k[40], k[41])
   259  	f(&r2, &r3, &r0, &r1, k[42], k[43])
   260  	f(&r0, &r1, &r2, &r3, k[44], k[45])
   261  	f(&r2, &r3, &r0, &r1, k[46], k[47])
   262  
   263  	t = r0 & k[48]
   264  	r1 ^= (t << 1) | (t >> (32 - 1))
   265  	r2 ^= r3 | k[51]
   266  	r0 ^= r1 | k[49]
   267  	t = r2 & k[50]
   268  	r3 ^= (t << 1) | (t >> (32 - 1))
   269  
   270  	f(&r0, &r1, &r2, &r3, k[52], k[53])
   271  	f(&r2, &r3, &r0, &r1, k[54], k[55])
   272  	f(&r0, &r1, &r2, &r3, k[56], k[57])
   273  	f(&r2, &r3, &r0, &r1, k[58], k[59])
   274  	f(&r0, &r1, &r2, &r3, k[60], k[61])
   275  	f(&r2, &r3, &r0, &r1, k[62], k[63])
   276  
   277  	r2 ^= c.sk[64]
   278  	r3 ^= c.sk[65]
   279  	r0 ^= c.sk[66]
   280  	r1 ^= c.sk[67]
   281  
   282  	dst[0] = byte(r2 >> 24)
   283  	dst[1] = byte(r2 >> 16)
   284  	dst[2] = byte(r2 >> 8)
   285  	dst[3] = byte(r2)
   286  	dst[4] = byte(r3 >> 24)
   287  	dst[5] = byte(r3 >> 16)
   288  	dst[6] = byte(r3 >> 8)
   289  	dst[7] = byte(r3)
   290  	dst[8] = byte(r0 >> 24)
   291  	dst[9] = byte(r0 >> 16)
   292  	dst[10] = byte(r0 >> 8)
   293  	dst[11] = byte(r0)
   294  	dst[12] = byte(r1 >> 24)
   295  	dst[13] = byte(r1 >> 16)
   296  	dst[14] = byte(r1 >> 8)
   297  	dst[15] = byte(r1)
   298  }
   299  
   300  func (c *blockCipher256) Decrypt(dst, src []byte) {
   301  	if len(src) < BlockSize {
   302  		panic("camellia: src buffer to small")
   303  	}
   304  	if len(dst) < BlockSize {
   305  		panic("camellia: dst buffer to small")
   306  	}
   307  
   308  	r0 := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
   309  	r1 := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
   310  	r2 := uint32(src[8])<<24 | uint32(src[9])<<16 | uint32(src[10])<<8 | uint32(src[11])
   311  	r3 := uint32(src[12])<<24 | uint32(src[13])<<16 | uint32(src[14])<<8 | uint32(src[15])
   312  
   313  	k := &(c.sk)
   314  
   315  	r3 ^= k[67]
   316  	r2 ^= k[66]
   317  	r1 ^= k[65]
   318  	r0 ^= k[64]
   319  
   320  	f(&r0, &r1, &r2, &r3, k[62], k[63])
   321  	f(&r2, &r3, &r0, &r1, k[60], k[61])
   322  	f(&r0, &r1, &r2, &r3, k[58], k[59])
   323  	f(&r2, &r3, &r0, &r1, k[56], k[57])
   324  	f(&r0, &r1, &r2, &r3, k[54], k[55])
   325  	f(&r2, &r3, &r0, &r1, k[52], k[53])
   326  
   327  	t := r0 & k[50]
   328  	r1 ^= (t << 1) | (t >> (32 - 1))
   329  	r2 ^= r3 | k[49]
   330  	r0 ^= r1 | k[51]
   331  	t = r2 & k[48]
   332  	r3 ^= (t << 1) | (t >> (32 - 1))
   333  
   334  	f(&r0, &r1, &r2, &r3, k[46], k[47])
   335  	f(&r2, &r3, &r0, &r1, k[44], k[45])
   336  	f(&r0, &r1, &r2, &r3, k[42], k[43])
   337  	f(&r2, &r3, &r0, &r1, k[40], k[41])
   338  	f(&r0, &r1, &r2, &r3, k[38], k[39])
   339  	f(&r2, &r3, &r0, &r1, k[36], k[37])
   340  
   341  	t = r0 & k[34]
   342  	r1 ^= (t << 1) | (t >> (32 - 1))
   343  	r2 ^= r3 | k[33]
   344  	r0 ^= r1 | k[35]
   345  	t = r2 & k[32]
   346  	r3 ^= (t << 1) | (t >> (32 - 1))
   347  
   348  	f(&r0, &r1, &r2, &r3, k[30], k[31])
   349  	f(&r2, &r3, &r0, &r1, k[28], k[29])
   350  	f(&r0, &r1, &r2, &r3, k[26], k[27])
   351  	f(&r2, &r3, &r0, &r1, k[24], k[25])
   352  	f(&r0, &r1, &r2, &r3, k[22], k[23])
   353  	f(&r2, &r3, &r0, &r1, k[20], k[21])
   354  
   355  	t = r0 & k[18]
   356  	r1 ^= (t << 1) | (t >> (32 - 1))
   357  	r2 ^= r3 | k[17]
   358  	r0 ^= r1 | k[19]
   359  	t = r2 & k[16]
   360  	r3 ^= (t << 1) | (t >> (32 - 1))
   361  
   362  	f(&r0, &r1, &r2, &r3, k[14], k[15])
   363  	f(&r2, &r3, &r0, &r1, k[12], k[13])
   364  	f(&r0, &r1, &r2, &r3, k[10], k[11])
   365  	f(&r2, &r3, &r0, &r1, k[8], k[9])
   366  	f(&r0, &r1, &r2, &r3, k[6], k[7])
   367  	f(&r2, &r3, &r0, &r1, k[4], k[5])
   368  
   369  	r1 ^= k[3]
   370  	r0 ^= k[2]
   371  	r3 ^= k[1]
   372  	r2 ^= k[0]
   373  
   374  	dst[0] = byte(r2 >> 24)
   375  	dst[1] = byte(r2 >> 16)
   376  	dst[2] = byte(r2 >> 8)
   377  	dst[3] = byte(r2)
   378  	dst[4] = byte(r3 >> 24)
   379  	dst[5] = byte(r3 >> 16)
   380  	dst[6] = byte(r3 >> 8)
   381  	dst[7] = byte(r3)
   382  	dst[8] = byte(r0 >> 24)
   383  	dst[9] = byte(r0 >> 16)
   384  	dst[10] = byte(r0 >> 8)
   385  	dst[11] = byte(r0)
   386  	dst[12] = byte(r1 >> 24)
   387  	dst[13] = byte(r1 >> 16)
   388  	dst[14] = byte(r1 >> 8)
   389  	dst[15] = byte(r1)
   390  }
   391  
   392  // The camellia key schedule for 128 bit keys.
   393  func (c *blockCipher128) keySchedule(key []byte) {
   394  	r0 := uint32(key[0])<<24 | uint32(key[1])<<16 | uint32(key[2])<<8 | uint32(key[3])
   395  	r1 := uint32(key[4])<<24 | uint32(key[5])<<16 | uint32(key[6])<<8 | uint32(key[7])
   396  	r2 := uint32(key[8])<<24 | uint32(key[9])<<16 | uint32(key[10])<<8 | uint32(key[11])
   397  	r3 := uint32(key[12])<<24 | uint32(key[13])<<16 | uint32(key[14])<<8 | uint32(key[15])
   398  
   399  	k := &(c.sk)
   400  
   401  	k[0], k[1], k[2], k[3] = r0, r1, r2, r3
   402  
   403  	f(&r0, &r1, &r2, &r3, sigma[0], sigma[1])
   404  	f(&r2, &r3, &r0, &r1, sigma[2], sigma[3])
   405  
   406  	r0 ^= k[0]
   407  	r1 ^= k[1]
   408  	r2 ^= k[2]
   409  	r3 ^= k[3]
   410  	f(&r0, &r1, &r2, &r3, sigma[4], sigma[5])
   411  	f(&r2, &r3, &r0, &r1, sigma[6], sigma[7])
   412  
   413  	k[4], k[5], k[6], k[7] = r0, r1, r2, r3
   414  	rotl128(&r0, &r1, &r2, &r3, 15) // KA <<< 15
   415  	k[12], k[13], k[14], k[15] = r0, r1, r2, r3
   416  	rotl128(&r0, &r1, &r2, &r3, 15) // KA <<< 30
   417  	k[16], k[17], k[18], k[19] = r0, r1, r2, r3
   418  	rotl128(&r0, &r1, &r2, &r3, 15) // KA <<< 45
   419  	k[24] = r0
   420  	k[25] = r1
   421  	rotl128(&r0, &r1, &r2, &r3, 15) // KA <<< 60
   422  	k[28], k[29], k[30], k[31] = r0, r1, r2, r3
   423  	rotl128(&r1, &r2, &r3, &r0, 2) // KA <<< 94
   424  	k[40], k[41], k[42], k[43] = r1, r2, r3, r0
   425  	rotl128(&r1, &r2, &r3, &r0, 17) // KA <<<111
   426  	k[48], k[49], k[50], k[51] = r1, r2, r3, r0
   427  
   428  	r0, r1, r2, r3 = k[0], k[1], k[2], k[3]
   429  	rotl128(&r0, &r1, &r2, &r3, 15) // KL <<< 15
   430  	k[8], k[9], k[10], k[11] = r0, r1, r2, r3
   431  	rotl128(&r0, &r1, &r2, &r3, 30) // KL <<< 45
   432  	k[20], k[21], k[22], k[23] = r0, r1, r2, r3
   433  	rotl128(&r0, &r1, &r2, &r3, 15) // KL <<< 60
   434  	k[26] = r2
   435  	k[27] = r3
   436  	rotl128(&r0, &r1, &r2, &r3, 17) // KL <<< 77
   437  	k[32], k[33], k[34], k[35] = r0, r1, r2, r3
   438  	rotl128(&r0, &r1, &r2, &r3, 17) // KL <<< 94
   439  	k[36], k[37], k[38], k[39] = r0, r1, r2, r3
   440  	rotl128(&r0, &r1, &r2, &r3, 17) // KL <<<111
   441  	k[44], k[45], k[46], k[47] = r0, r1, r2, r3
   442  }
   443  
   444  // The camellia key schedule for 192 or 256 bit keys.
   445  func (c *blockCipher256) keySchedule(key []byte) {
   446  	k := &(c.sk)
   447  	k[0] = uint32(key[0])<<24 | uint32(key[1])<<16 | uint32(key[2])<<8 | uint32(key[3])
   448  	k[1] = uint32(key[4])<<24 | uint32(key[5])<<16 | uint32(key[6])<<8 | uint32(key[7])
   449  	k[2] = uint32(key[8])<<24 | uint32(key[9])<<16 | uint32(key[10])<<8 | uint32(key[11])
   450  	k[3] = uint32(key[12])<<24 | uint32(key[13])<<16 | uint32(key[14])<<8 | uint32(key[15])
   451  
   452  	k[8] = uint32(key[16])<<24 | uint32(key[17])<<16 | uint32(key[18])<<8 | uint32(key[19])
   453  	k[9] = uint32(key[20])<<24 | uint32(key[21])<<16 | uint32(key[22])<<8 | uint32(key[23])
   454  	if len(key) == 24 {
   455  		k[10] = ^k[8]
   456  		k[11] = ^k[9]
   457  	} else {
   458  		k[10] = uint32(key[24])<<24 | uint32(key[25])<<16 | uint32(key[26])<<8 | uint32(key[27])
   459  		k[11] = uint32(key[28])<<24 | uint32(key[29])<<16 | uint32(key[30])<<8 | uint32(key[31])
   460  	}
   461  
   462  	s0 := k[8] ^ k[0]
   463  	s1 := k[9] ^ k[1]
   464  	s2 := k[10] ^ k[2]
   465  	s3 := k[11] ^ k[3]
   466  
   467  	f(&s0, &s1, &s2, &s3, sigma[0], sigma[1])
   468  	f(&s2, &s3, &s0, &s1, sigma[2], sigma[3])
   469  
   470  	s0 ^= k[0]
   471  	s1 ^= k[1]
   472  	s2 ^= k[2]
   473  	s3 ^= k[3]
   474  	f(&s0, &s1, &s2, &s3, sigma[4], sigma[5])
   475  	f(&s2, &s3, &s0, &s1, sigma[6], sigma[7])
   476  
   477  	k[12], k[13], k[14], k[15] = s0, s1, s2, s3
   478  	s0 ^= k[8]
   479  	s1 ^= k[9]
   480  	s2 ^= k[10]
   481  	s3 ^= k[11]
   482  	f(&s0, &s1, &s2, &s3, sigma[8], sigma[9])
   483  	f(&s2, &s3, &s0, &s1, sigma[10], sigma[11])
   484  
   485  	k[4], k[5], k[6], k[7] = s0, s1, s2, s3
   486  	rotl128(&s0, &s1, &s2, &s3, 30) // KB <<< 30
   487  	k[20], k[21], k[22], k[23] = s0, s1, s2, s3
   488  	rotl128(&s0, &s1, &s2, &s3, 30) // KB <<< 60
   489  	k[40], k[41], k[42], k[43] = s0, s1, s2, s3
   490  	rotl128(&s1, &s2, &s3, &s0, 19) // KB <<<111
   491  	k[64], k[65], k[66], k[67] = s1, s2, s3, s0
   492  
   493  	s0, s1, s2, s3 = k[8], k[9], k[10], k[11]
   494  	rotl128(&s0, &s1, &s2, &s3, 15) // KR <<< 15
   495  	k[8], k[9], k[10], k[11] = s0, s1, s2, s3
   496  	rotl128(&s0, &s1, &s2, &s3, 15) // KR <<< 30
   497  	k[16], k[17], k[18], k[19] = s0, s1, s2, s3
   498  	rotl128(&s0, &s1, &s2, &s3, 30) // KR <<< 60
   499  	k[36], k[37], k[38], k[39] = s0, s1, s2, s3
   500  	rotl128(&s1, &s2, &s3, &s0, 2) // KR <<< 94
   501  	k[52], k[53], k[54], k[55] = s1, s2, s3, s0
   502  
   503  	s0, s1, s2, s3 = k[12], k[13], k[14], k[15]
   504  	rotl128(&s0, &s1, &s2, &s3, 15) // KA <<< 15
   505  	k[12], k[13], k[14], k[15] = s0, s1, s2, s3
   506  	rotl128(&s0, &s1, &s2, &s3, 30) // KA <<< 45
   507  	k[28], k[29], k[30], k[31] = s0, s1, s2, s3
   508  	// KA <<< 77
   509  	k[48], k[49], k[50], k[51] = s1, s2, s3, s0
   510  	rotl128(&s1, &s2, &s3, &s0, 17) // KA <<< 94
   511  	k[56], k[57], k[58], k[59] = s1, s2, s3, s0
   512  
   513  	s0, s1, s2, s3 = k[0], k[1], k[2], k[3]
   514  	rotl128(&s1, &s2, &s3, &s0, 13) // KL <<< 45
   515  	k[24], k[25], k[26], k[27] = s1, s2, s3, s0
   516  	rotl128(&s1, &s2, &s3, &s0, 15) // KL <<< 60
   517  	k[32], k[33], k[34], k[35] = s1, s2, s3, s0
   518  	rotl128(&s1, &s2, &s3, &s0, 17) // KL <<< 77
   519  	k[44], k[45], k[46], k[47] = s1, s2, s3, s0
   520  	rotl128(&s2, &s3, &s0, &s1, 2) // KL <<<111
   521  	k[60], k[61], k[62], k[63] = s2, s3, s0, s1
   522  }