git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/crypto/internal/blake3/compress_generic.go (about) 1 package blake3 2 3 import ( 4 "bytes" 5 "math/bits" 6 ) 7 8 func compressNodeGeneric(out *[16]uint32, n node) { 9 g := func(a, b, c, d, mx, my uint32) (uint32, uint32, uint32, uint32) { 10 a += b + mx 11 d = bits.RotateLeft32(d^a, -16) 12 c += d 13 b = bits.RotateLeft32(b^c, -12) 14 a += b + my 15 d = bits.RotateLeft32(d^a, -8) 16 c += d 17 b = bits.RotateLeft32(b^c, -7) 18 return a, b, c, d 19 } 20 21 // NOTE: we unroll all of the rounds, as well as the permutations that occur 22 // between rounds. 23 24 // round 1 (also initializes state) 25 // columns 26 s0, s4, s8, s12 := g(n.cv[0], n.cv[4], iv[0], uint32(n.counter), n.block[0], n.block[1]) 27 s1, s5, s9, s13 := g(n.cv[1], n.cv[5], iv[1], uint32(n.counter>>32), n.block[2], n.block[3]) 28 s2, s6, s10, s14 := g(n.cv[2], n.cv[6], iv[2], n.blockLen, n.block[4], n.block[5]) 29 s3, s7, s11, s15 := g(n.cv[3], n.cv[7], iv[3], n.flags, n.block[6], n.block[7]) 30 // diagonals 31 s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[8], n.block[9]) 32 s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[10], n.block[11]) 33 s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[12], n.block[13]) 34 s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[14], n.block[15]) 35 36 // round 2 37 s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[2], n.block[6]) 38 s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[3], n.block[10]) 39 s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[7], n.block[0]) 40 s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[4], n.block[13]) 41 s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[1], n.block[11]) 42 s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[12], n.block[5]) 43 s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[9], n.block[14]) 44 s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[15], n.block[8]) 45 46 // round 3 47 s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[3], n.block[4]) 48 s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[10], n.block[12]) 49 s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[13], n.block[2]) 50 s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[7], n.block[14]) 51 s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[6], n.block[5]) 52 s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[9], n.block[0]) 53 s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[11], n.block[15]) 54 s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[8], n.block[1]) 55 56 // round 4 57 s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[10], n.block[7]) 58 s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[12], n.block[9]) 59 s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[14], n.block[3]) 60 s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[13], n.block[15]) 61 s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[4], n.block[0]) 62 s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[11], n.block[2]) 63 s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[5], n.block[8]) 64 s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[1], n.block[6]) 65 66 // round 5 67 s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[12], n.block[13]) 68 s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[9], n.block[11]) 69 s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[15], n.block[10]) 70 s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[14], n.block[8]) 71 s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[7], n.block[2]) 72 s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[5], n.block[3]) 73 s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[0], n.block[1]) 74 s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[6], n.block[4]) 75 76 // round 6 77 s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[9], n.block[14]) 78 s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[11], n.block[5]) 79 s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[8], n.block[12]) 80 s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[15], n.block[1]) 81 s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[13], n.block[3]) 82 s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[0], n.block[10]) 83 s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[2], n.block[6]) 84 s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[4], n.block[7]) 85 86 // round 7 87 s0, s4, s8, s12 = g(s0, s4, s8, s12, n.block[11], n.block[15]) 88 s1, s5, s9, s13 = g(s1, s5, s9, s13, n.block[5], n.block[0]) 89 s2, s6, s10, s14 = g(s2, s6, s10, s14, n.block[1], n.block[9]) 90 s3, s7, s11, s15 = g(s3, s7, s11, s15, n.block[8], n.block[6]) 91 s0, s5, s10, s15 = g(s0, s5, s10, s15, n.block[14], n.block[10]) 92 s1, s6, s11, s12 = g(s1, s6, s11, s12, n.block[2], n.block[12]) 93 s2, s7, s8, s13 = g(s2, s7, s8, s13, n.block[3], n.block[4]) 94 s3, s4, s9, s14 = g(s3, s4, s9, s14, n.block[7], n.block[13]) 95 96 // finalization 97 *out = [16]uint32{ 98 s0 ^ s8, s1 ^ s9, s2 ^ s10, s3 ^ s11, 99 s4 ^ s12, s5 ^ s13, s6 ^ s14, s7 ^ s15, 100 s8 ^ n.cv[0], s9 ^ n.cv[1], s10 ^ n.cv[2], s11 ^ n.cv[3], 101 s12 ^ n.cv[4], s13 ^ n.cv[5], s14 ^ n.cv[6], s15 ^ n.cv[7], 102 } 103 } 104 105 func chainingValue(n node) (cv [8]uint32) { 106 full := compressNode(n) 107 copy(cv[:], full[:]) 108 return 109 } 110 111 func compressBufferGeneric(buf *[maxSIMD * chunkSize]byte, buflen int, key *[8]uint32, counter uint64, flags uint32) (n node) { 112 if buflen <= chunkSize { 113 return compressChunk(buf[:buflen], key, counter, flags) 114 } 115 var cvs [maxSIMD][8]uint32 116 var numCVs uint64 117 for bb := bytes.NewBuffer(buf[:buflen]); bb.Len() > 0; numCVs++ { 118 cvs[numCVs] = chainingValue(compressChunk(bb.Next(chunkSize), key, counter+numCVs, flags)) 119 } 120 return mergeSubtrees(&cvs, numCVs, key, flags) 121 } 122 123 func compressBlocksGeneric(outs *[maxSIMD][64]byte, n node) { 124 for i := range outs { 125 wordsToBytes(compressNode(n), &outs[i]) 126 n.counter++ 127 } 128 } 129 130 func mergeSubtreesGeneric(cvs *[maxSIMD][8]uint32, numCVs uint64, key *[8]uint32, flags uint32) node { 131 for numCVs > 2 { 132 rem := numCVs / 2 133 for i := range cvs[:rem] { 134 cvs[i] = chainingValue(parentNode(cvs[i*2], cvs[i*2+1], *key, flags)) 135 } 136 if numCVs%2 != 0 { 137 cvs[rem] = cvs[rem*2] 138 rem++ 139 } 140 numCVs = rem 141 } 142 return parentNode(cvs[0], cvs[1], *key, flags) 143 }