github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/message/varint_test.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package message 16 17 import ( 18 "encoding/binary" 19 "fmt" 20 "math" 21 "math/rand" 22 "testing" 23 24 "github.com/stretchr/testify/assert" 25 ) 26 27 var testRand = rand.New(rand.NewSource(1)) 28 29 func TestVarint(t *testing.T) { 30 t.Run("min delta varint", func(t *testing.T) { 31 testRoundTripVarints(t, minDeltaCodec{}) 32 }) 33 t.Run("mean delta varint", func(t *testing.T) { 34 testRoundTripVarints(t, meanDeltaCodec{}) 35 }) 36 t.Run("direct varint", func(t *testing.T) { 37 testRoundTripVarints(t, directCodec{}) 38 }) 39 t.Run("signed delta varint", func(t *testing.T) { 40 testRoundTripVarints(t, signedDeltaCodec{}) 41 }) 42 } 43 44 func BenchmarkVarint(b *testing.B) { 45 b.Run("signed delta varint", func(b *testing.B) { 46 benchmarkVarintCodec(b, signedDeltaCodec{}) 47 }) 48 b.Run("min delta varint", func(b *testing.B) { 49 benchmarkVarintCodec(b, minDeltaCodec{}) 50 }) 51 b.Run("mean delta varint", func(b *testing.B) { 52 benchmarkVarintCodec(b, meanDeltaCodec{}) 53 }) 54 b.Run("direct varint", func(tb *testing.B) { 55 benchmarkVarintCodec(b, directCodec{}) 56 }) 57 58 } 59 60 type codec interface { 61 encode(ints []uint64, buf []byte) []byte 62 decode(buf []byte, ints []uint64) []uint64 63 maxSize(n int) int 64 } 65 66 type minDeltaCodec struct{} 67 68 func (d minDeltaCodec) encode(ints []uint64, buf []byte) []byte { 69 return encodeMinDeltas(ints, buf) 70 } 71 72 func (d minDeltaCodec) decode(buf []byte, ints []uint64) []uint64 { 73 return decodeMinDeltas(buf, ints) 74 } 75 76 func (d minDeltaCodec) maxSize(n int) int { 77 return maxEncodedSize(n) 78 } 79 80 type meanDeltaCodec struct{} 81 82 func (d meanDeltaCodec) encode(ints []uint64, buf []byte) []byte { 83 return encodeMeanDeltas(ints, buf) 84 } 85 86 func (d meanDeltaCodec) decode(buf []byte, ints []uint64) []uint64 { 87 return decodeMeanDeltas(buf, ints) 88 } 89 90 func (d meanDeltaCodec) maxSize(n int) int { 91 return maxEncodedSize(n) 92 } 93 94 type directCodec struct{} 95 96 func (d directCodec) encode(ints []uint64, buf []byte) []byte { 97 return encodeVarintDirect(ints, buf) 98 } 99 100 func (d directCodec) decode(buf []byte, ints []uint64) []uint64 { 101 return decodeVarintDirect(buf, ints) 102 } 103 104 func (d directCodec) maxSize(n int) int { 105 return n * binary.MaxVarintLen64 106 } 107 108 type signedDeltaCodec struct{} 109 110 func (d signedDeltaCodec) encode(ints []uint64, buf []byte) []byte { 111 return endcodeSignedDeltas(ints, buf) 112 } 113 114 func (d signedDeltaCodec) decode(buf []byte, ints []uint64) []uint64 { 115 return decodeSignedDeltas(buf, ints) 116 } 117 118 func (d signedDeltaCodec) maxSize(n int) int { 119 return n * binary.MaxVarintLen64 120 } 121 122 func testRoundTripVarints(t *testing.T, c codec) { 123 for k := 0; k < 1000; k++ { 124 n := testRand.Intn(145) + 5 125 126 counts := make([]uint64, n) 127 sum := uint64(0) 128 for i := range counts { 129 c := testRand.Uint64() % math.MaxUint32 130 counts[i] = c 131 sum += c 132 } 133 assert.Equal(t, sum, sumSubtrees(counts)) 134 135 // round trip the array 136 buf := make([]byte, c.maxSize(len(counts))) 137 buf = c.encode(counts, buf) 138 actual := c.decode(buf, make([]uint64, n)) 139 140 assert.Equal(t, counts, actual) 141 } 142 } 143 144 func benchmarkVarintCodec(b *testing.B, c codec) { 145 k := 150 // branching factor 146 147 b.Run("level 1 subtree counts", func(b *testing.B) { 148 mean := uint64(k) 149 benchmarkVarint(b, mean, mean/4, k, c) 150 }) 151 b.Run("level 2 subtree counts", func(b *testing.B) { 152 mean := uint64(k * k) 153 benchmarkVarint(b, mean, mean/4, k, c) 154 }) 155 b.Run("level 3 subtree counts", func(b *testing.B) { 156 mean := uint64(k * k * k) 157 benchmarkVarint(b, mean, mean/4, k, c) 158 }) 159 b.Run("level 4 subtree counts", func(b *testing.B) { 160 mean := uint64(k * k * k * k) 161 benchmarkVarint(b, mean, mean/4, k, c) 162 }) 163 } 164 165 func benchmarkVarint(b *testing.B, mean, std uint64, k int, c codec) { 166 const n = 1000 167 ints, bufs := makeBenchmarkData(mean, std, k, n, c) 168 169 name := fmt.Sprintf("benchmark encode (mean: %d std: %d)", mean, std) 170 b.Run(name, func(b *testing.B) { 171 buf := make([]byte, c.maxSize(k)) 172 for i := 0; i < b.N; i++ { 173 _ = c.encode(ints[i%n][:], buf) 174 } 175 }) 176 name = fmt.Sprintf("benchmark decode (mean size: %f)", meanSize(bufs)) 177 b.Run(name, func(b *testing.B) { 178 ints := make([]uint64, k) 179 for i := 0; i < b.N; i++ { 180 _ = c.decode(bufs[i%n], ints) 181 } 182 }) 183 } 184 185 func makeBenchmarkData(mean, std uint64, k, n int, c codec) (ints [][]uint64, bufs [][]byte) { 186 ints = make([][]uint64, n) 187 bufs = make([][]byte, n) 188 189 for i := range ints { 190 ints[i] = make([]uint64, k) 191 for j := range ints[i] { 192 ints[i][j] = gaussian(float64(mean), float64(std)) 193 } 194 } 195 for i := range bufs { 196 bufs[i] = make([]byte, c.maxSize(k)) 197 bufs[i] = c.encode(ints[i], bufs[i]) 198 } 199 return 200 } 201 202 func gaussian(mean, std float64) uint64 { 203 return uint64(testRand.NormFloat64()*std + mean) 204 } 205 206 func meanSize(encoded [][]byte) float64 { 207 var sumSz int 208 for i := range encoded { 209 sumSz += len(encoded[i]) 210 } 211 return float64(sumSz) / float64(len(encoded)) 212 } 213 214 func encodeVarintDirect(ints []uint64, buf []byte) []byte { 215 pos := 0 216 for i := range ints { 217 pos += binary.PutUvarint(buf[pos:], ints[i]) 218 } 219 return buf[:pos] 220 } 221 222 func decodeVarintDirect(buf []byte, ints []uint64) []uint64 { 223 for i := range ints { 224 var n int 225 ints[i], n = binary.Uvarint(buf) 226 buf = buf[n:] 227 } 228 assertTrue(len(buf) == 0, "extra bytes after decoding varints") 229 return ints 230 } 231 232 // encodeMinDeltas encodes an unsorted array |ints|. 233 // The encoding format attempts to minimize encoded size by 234 // first finding and encoding the minimum value of |ints| 235 // and then encoding the difference between each value and 236 // that minimum. 237 func encodeMinDeltas(ints []uint64, buf []byte) []byte { 238 min := uint64(math.MaxUint64) 239 for i := range ints { 240 if min > ints[i] { 241 min = ints[i] 242 } 243 } 244 245 pos := 0 246 pos += binary.PutUvarint(buf[pos:], min) 247 248 for _, count := range ints { 249 delta := count - min 250 pos += binary.PutUvarint(buf[pos:], delta) 251 } 252 return buf[:pos] 253 } 254 255 // decodeMinDeltas decodes an array of ints that were 256 // previously encoded with encodeMinDeltas. 257 func decodeMinDeltas(buf []byte, ints []uint64) []uint64 { 258 min, k := binary.Uvarint(buf) 259 buf = buf[k:] 260 for i := range ints { 261 delta, k := binary.Uvarint(buf) 262 buf = buf[k:] 263 ints[i] = min + delta 264 } 265 assertTrue(len(buf) == 0, "extra bytes after decoding varints") 266 return ints 267 } 268 269 func encodeMeanDeltas(ints []uint64, buf []byte) []byte { 270 var sum int64 271 for i := range ints { 272 sum += int64(ints[i]) 273 } 274 mean := sum / int64(len(ints)) 275 276 pos := 0 277 pos += binary.PutVarint(buf[pos:], mean) 278 279 for _, count := range ints { 280 delta := int64(count) - mean 281 pos += binary.PutVarint(buf[pos:], delta) 282 } 283 return buf[:pos] 284 } 285 286 func decodeMeanDeltas(buf []byte, ints []uint64) []uint64 { 287 mean, k := binary.Varint(buf) 288 buf = buf[k:] 289 for i := range ints { 290 delta, k := binary.Varint(buf) 291 buf = buf[k:] 292 ints[i] = uint64(mean + delta) 293 } 294 assertTrue(len(buf) == 0, "extra bytes after decoding varints") 295 return ints 296 }