git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/crypto/internal/blake3/compress_generic.go (about)

     1  package blake3
     2  
     3  import (
     4  	"bytes"
     5  	"math/bits"
     6  )
     7  
     8  func compressNodeGeneric(out *[16]uint32, n node) {
     9  	g := func(a, b, c, d, mx, my uint32) (uint32, uint32, uint32, uint32) {
    10  		a += b + mx
    11  		d = bits.RotateLeft32(d^a, -16)
    12  		c += d
    13  		b = bits.RotateLeft32(b^c, -12)
    14  		a += b + my
    15  		d = bits.RotateLeft32(d^a, -8)
    16  		c += d
    17  		b = bits.RotateLeft32(b^c, -7)
    18  		return a, b, c, d
    19  	}
    20  
    21  	// NOTE: we unroll all of the rounds, as well as the permutations that occur
    22  	// between rounds.
    23  
    24  	// round 1 (also initializes state)
    25  	// columns
    26  	s0, s4, s8, s12 := g(n.cv[0], n.cv[4], iv[0], uint32(n.counter), n.block[0], n.block[1])
    27  	s1, s5, s9, s13 := g(n.cv[1], n.cv[5], iv[1], uint32(n.counter>>32), n.block[2], n.block[3])
    28  	s2, s6, s10, s14 := g(n.cv[2], n.cv[6], iv[2], n.blockLen, n.block[4], n.block[5])
    29  	s3, s7, s11, s15 := g(n.cv[3], n.cv[7], iv[3], n.flags, n.block[6], n.block[7])
    30  	// diagonals
    31  	s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[8], n.block[9])
    32  	s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[10], n.block[11])
    33  	s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[12], n.block[13])
    34  	s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[14], n.block[15])
    35  
    36  	// round 2
    37  	s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[2], n.block[6])
    38  	s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[3], n.block[10])
    39  	s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[7], n.block[0])
    40  	s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[4], n.block[13])
    41  	s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[1], n.block[11])
    42  	s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[12], n.block[5])
    43  	s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[9], n.block[14])
    44  	s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[15], n.block[8])
    45  
    46  	// round 3
    47  	s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[3], n.block[4])
    48  	s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[10], n.block[12])
    49  	s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[13], n.block[2])
    50  	s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[7], n.block[14])
    51  	s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[6], n.block[5])
    52  	s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[9], n.block[0])
    53  	s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[11], n.block[15])
    54  	s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[8], n.block[1])
    55  
    56  	// round 4
    57  	s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[10], n.block[7])
    58  	s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[12], n.block[9])
    59  	s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[14], n.block[3])
    60  	s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[13], n.block[15])
    61  	s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[4], n.block[0])
    62  	s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[11], n.block[2])
    63  	s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[5], n.block[8])
    64  	s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[1], n.block[6])
    65  
    66  	// round 5
    67  	s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[12], n.block[13])
    68  	s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[9], n.block[11])
    69  	s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[15], n.block[10])
    70  	s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[14], n.block[8])
    71  	s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[7], n.block[2])
    72  	s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[5], n.block[3])
    73  	s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[0], n.block[1])
    74  	s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[6], n.block[4])
    75  
    76  	// round 6
    77  	s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[9], n.block[14])
    78  	s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[11], n.block[5])
    79  	s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[8], n.block[12])
    80  	s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[15], n.block[1])
    81  	s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[13], n.block[3])
    82  	s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[0], n.block[10])
    83  	s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[2], n.block[6])
    84  	s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[4], n.block[7])
    85  
    86  	// round 7
    87  	s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[11], n.block[15])
    88  	s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[5], n.block[0])
    89  	s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[1], n.block[9])
    90  	s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[8], n.block[6])
    91  	s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[14], n.block[10])
    92  	s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[2], n.block[12])
    93  	s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[3], n.block[4])
    94  	s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[7], n.block[13])
    95  
    96  	// finalization
    97  	*out = [16]uint32{
    98  		s0 ^ s8, s1 ^ s9, s2 ^ s10, s3 ^ s11,
    99  		s4 ^ s12, s5 ^ s13, s6 ^ s14, s7 ^ s15,
   100  		s8 ^ n.cv[0], s9 ^ n.cv[1], s10 ^ n.cv[2], s11 ^ n.cv[3],
   101  		s12 ^ n.cv[4], s13 ^ n.cv[5], s14 ^ n.cv[6], s15 ^ n.cv[7],
   102  	}
   103  }
   104  
   105  func chainingValue(n node) (cv [8]uint32) {
   106  	full := compressNode(n)
   107  	copy(cv[:], full[:])
   108  	return
   109  }
   110  
   111  func compressBufferGeneric(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) (n node) {
   112  	if buflen <= chunkSize {
   113  		return compressChunk(buf[:buflen], key, counter, flags)
   114  	}
   115  	var cvs [maxSIMD][8]uint32
   116  	var numCVs uint64
   117  	for bb := bytes.NewBuffer(buf[:buflen]); bb.Len() > 0; numCVs++ {
   118  		cvs[numCVs] = chainingValue(compressChunk(bb.Next(chunkSize), key, counter+numCVs, flags))
   119  	}
   120  	return mergeSubtrees(&cvs, numCVs, key, flags)
   121  }
   122  
   123  func compressBlocksGeneric(outs *[maxSIMD][64]byte, n node) {
   124  	for i := range outs {
   125  		wordsToBytes(compressNode(n), &outs[i])
   126  		n.counter++
   127  	}
   128  }
   129  
   130  func mergeSubtreesGeneric(cvs *[maxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags uint32) node {
   131  	for numCVs > 2 {
   132  		rem := numCVs / 2
   133  		for i := range cvs[:rem] {
   134  			cvs[i] = chainingValue(parentNode(cvs[i*2], cvs[i*2+1], *key, flags))
   135  		}
   136  		if numCVs%2 != 0 {
   137  			cvs[rem] = cvs[rem*2]
   138  			rem++
   139  		}
   140  		numCVs = rem
   141  	}
   142  	return parentNode(cvs[0], cvs[1], *key, flags)
   143  }