github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/message/varint_test.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package message
    16  
    17  import (
    18  	"encoding/binary"
    19  	"fmt"
    20  	"math"
    21  	"math/rand"
    22  	"testing"
    23  
    24  	"github.com/stretchr/testify/assert"
    25  )
    26  
    27  var testRand = rand.New(rand.NewSource(1))
    28  
    29  func TestVarint(t *testing.T) {
    30  	t.Run("min delta varint", func(t *testing.T) {
    31  		testRoundTripVarints(t, minDeltaCodec{})
    32  	})
    33  	t.Run("mean delta varint", func(t *testing.T) {
    34  		testRoundTripVarints(t, meanDeltaCodec{})
    35  	})
    36  	t.Run("direct varint", func(t *testing.T) {
    37  		testRoundTripVarints(t, directCodec{})
    38  	})
    39  	t.Run("signed delta varint", func(t *testing.T) {
    40  		testRoundTripVarints(t, signedDeltaCodec{})
    41  	})
    42  }
    43  
    44  func BenchmarkVarint(b *testing.B) {
    45  	b.Run("signed delta varint", func(b *testing.B) {
    46  		benchmarkVarintCodec(b, signedDeltaCodec{})
    47  	})
    48  	b.Run("min delta varint", func(b *testing.B) {
    49  		benchmarkVarintCodec(b, minDeltaCodec{})
    50  	})
    51  	b.Run("mean delta varint", func(b *testing.B) {
    52  		benchmarkVarintCodec(b, meanDeltaCodec{})
    53  	})
    54  	b.Run("direct varint", func(tb *testing.B) {
    55  		benchmarkVarintCodec(b, directCodec{})
    56  	})
    57  
    58  }
    59  
    60  type codec interface {
    61  	encode(ints []uint64, buf []byte) []byte
    62  	decode(buf []byte, ints []uint64) []uint64
    63  	maxSize(n int) int
    64  }
    65  
    66  type minDeltaCodec struct{}
    67  
    68  func (d minDeltaCodec) encode(ints []uint64, buf []byte) []byte {
    69  	return encodeMinDeltas(ints, buf)
    70  }
    71  
    72  func (d minDeltaCodec) decode(buf []byte, ints []uint64) []uint64 {
    73  	return decodeMinDeltas(buf, ints)
    74  }
    75  
    76  func (d minDeltaCodec) maxSize(n int) int {
    77  	return maxEncodedSize(n)
    78  }
    79  
    80  type meanDeltaCodec struct{}
    81  
    82  func (d meanDeltaCodec) encode(ints []uint64, buf []byte) []byte {
    83  	return encodeMeanDeltas(ints, buf)
    84  }
    85  
    86  func (d meanDeltaCodec) decode(buf []byte, ints []uint64) []uint64 {
    87  	return decodeMeanDeltas(buf, ints)
    88  }
    89  
    90  func (d meanDeltaCodec) maxSize(n int) int {
    91  	return maxEncodedSize(n)
    92  }
    93  
    94  type directCodec struct{}
    95  
    96  func (d directCodec) encode(ints []uint64, buf []byte) []byte {
    97  	return encodeVarintDirect(ints, buf)
    98  }
    99  
   100  func (d directCodec) decode(buf []byte, ints []uint64) []uint64 {
   101  	return decodeVarintDirect(buf, ints)
   102  }
   103  
   104  func (d directCodec) maxSize(n int) int {
   105  	return n * binary.MaxVarintLen64
   106  }
   107  
   108  type signedDeltaCodec struct{}
   109  
   110  func (d signedDeltaCodec) encode(ints []uint64, buf []byte) []byte {
   111  	return endcodeSignedDeltas(ints, buf)
   112  }
   113  
   114  func (d signedDeltaCodec) decode(buf []byte, ints []uint64) []uint64 {
   115  	return decodeSignedDeltas(buf, ints)
   116  }
   117  
   118  func (d signedDeltaCodec) maxSize(n int) int {
   119  	return n * binary.MaxVarintLen64
   120  }
   121  
   122  func testRoundTripVarints(t *testing.T, c codec) {
   123  	for k := 0; k < 1000; k++ {
   124  		n := testRand.Intn(145) + 5
   125  
   126  		counts := make([]uint64, n)
   127  		sum := uint64(0)
   128  		for i := range counts {
   129  			c := testRand.Uint64() % math.MaxUint32
   130  			counts[i] = c
   131  			sum += c
   132  		}
   133  		assert.Equal(t, sum, sumSubtrees(counts))
   134  
   135  		// round trip the array
   136  		buf := make([]byte, c.maxSize(len(counts)))
   137  		buf = c.encode(counts, buf)
   138  		actual := c.decode(buf, make([]uint64, n))
   139  
   140  		assert.Equal(t, counts, actual)
   141  	}
   142  }
   143  
   144  func benchmarkVarintCodec(b *testing.B, c codec) {
   145  	k := 150 // branching factor
   146  
   147  	b.Run("level 1 subtree counts", func(b *testing.B) {
   148  		mean := uint64(k)
   149  		benchmarkVarint(b, mean, mean/4, k, c)
   150  	})
   151  	b.Run("level 2 subtree counts", func(b *testing.B) {
   152  		mean := uint64(k * k)
   153  		benchmarkVarint(b, mean, mean/4, k, c)
   154  	})
   155  	b.Run("level 3 subtree counts", func(b *testing.B) {
   156  		mean := uint64(k * k * k)
   157  		benchmarkVarint(b, mean, mean/4, k, c)
   158  	})
   159  	b.Run("level 4 subtree counts", func(b *testing.B) {
   160  		mean := uint64(k * k * k * k)
   161  		benchmarkVarint(b, mean, mean/4, k, c)
   162  	})
   163  }
   164  
   165  func benchmarkVarint(b *testing.B, mean, std uint64, k int, c codec) {
   166  	const n = 1000
   167  	ints, bufs := makeBenchmarkData(mean, std, k, n, c)
   168  
   169  	name := fmt.Sprintf("benchmark encode (mean: %d std: %d)", mean, std)
   170  	b.Run(name, func(b *testing.B) {
   171  		buf := make([]byte, c.maxSize(k))
   172  		for i := 0; i < b.N; i++ {
   173  			_ = c.encode(ints[i%n][:], buf)
   174  		}
   175  	})
   176  	name = fmt.Sprintf("benchmark decode (mean size: %f)", meanSize(bufs))
   177  	b.Run(name, func(b *testing.B) {
   178  		ints := make([]uint64, k)
   179  		for i := 0; i < b.N; i++ {
   180  			_ = c.decode(bufs[i%n], ints)
   181  		}
   182  	})
   183  }
   184  
   185  func makeBenchmarkData(mean, std uint64, k, n int, c codec) (ints [][]uint64, bufs [][]byte) {
   186  	ints = make([][]uint64, n)
   187  	bufs = make([][]byte, n)
   188  
   189  	for i := range ints {
   190  		ints[i] = make([]uint64, k)
   191  		for j := range ints[i] {
   192  			ints[i][j] = gaussian(float64(mean), float64(std))
   193  		}
   194  	}
   195  	for i := range bufs {
   196  		bufs[i] = make([]byte, c.maxSize(k))
   197  		bufs[i] = c.encode(ints[i], bufs[i])
   198  	}
   199  	return
   200  }
   201  
   202  func gaussian(mean, std float64) uint64 {
   203  	return uint64(testRand.NormFloat64()*std + mean)
   204  }
   205  
   206  func meanSize(encoded [][]byte) float64 {
   207  	var sumSz int
   208  	for i := range encoded {
   209  		sumSz += len(encoded[i])
   210  	}
   211  	return float64(sumSz) / float64(len(encoded))
   212  }
   213  
   214  func encodeVarintDirect(ints []uint64, buf []byte) []byte {
   215  	pos := 0
   216  	for i := range ints {
   217  		pos += binary.PutUvarint(buf[pos:], ints[i])
   218  	}
   219  	return buf[:pos]
   220  }
   221  
   222  func decodeVarintDirect(buf []byte, ints []uint64) []uint64 {
   223  	for i := range ints {
   224  		var n int
   225  		ints[i], n = binary.Uvarint(buf)
   226  		buf = buf[n:]
   227  	}
   228  	assertTrue(len(buf) == 0, "extra bytes after decoding varints")
   229  	return ints
   230  }
   231  
   232  // encodeMinDeltas encodes an unsorted array |ints|.
   233  // The encoding format attempts to minimize encoded size by
   234  // first finding and encoding the minimum value of |ints|
   235  // and then encoding the difference between each value and
   236  // that minimum.
   237  func encodeMinDeltas(ints []uint64, buf []byte) []byte {
   238  	min := uint64(math.MaxUint64)
   239  	for i := range ints {
   240  		if min > ints[i] {
   241  			min = ints[i]
   242  		}
   243  	}
   244  
   245  	pos := 0
   246  	pos += binary.PutUvarint(buf[pos:], min)
   247  
   248  	for _, count := range ints {
   249  		delta := count - min
   250  		pos += binary.PutUvarint(buf[pos:], delta)
   251  	}
   252  	return buf[:pos]
   253  }
   254  
   255  // decodeMinDeltas decodes an array of ints that were
   256  // previously encoded with encodeMinDeltas.
   257  func decodeMinDeltas(buf []byte, ints []uint64) []uint64 {
   258  	min, k := binary.Uvarint(buf)
   259  	buf = buf[k:]
   260  	for i := range ints {
   261  		delta, k := binary.Uvarint(buf)
   262  		buf = buf[k:]
   263  		ints[i] = min + delta
   264  	}
   265  	assertTrue(len(buf) == 0, "extra bytes after decoding varints")
   266  	return ints
   267  }
   268  
   269  func encodeMeanDeltas(ints []uint64, buf []byte) []byte {
   270  	var sum int64
   271  	for i := range ints {
   272  		sum += int64(ints[i])
   273  	}
   274  	mean := sum / int64(len(ints))
   275  
   276  	pos := 0
   277  	pos += binary.PutVarint(buf[pos:], mean)
   278  
   279  	for _, count := range ints {
   280  		delta := int64(count) - mean
   281  		pos += binary.PutVarint(buf[pos:], delta)
   282  	}
   283  	return buf[:pos]
   284  }
   285  
   286  func decodeMeanDeltas(buf []byte, ints []uint64) []uint64 {
   287  	mean, k := binary.Varint(buf)
   288  	buf = buf[k:]
   289  	for i := range ints {
   290  		delta, k := binary.Varint(buf)
   291  		buf = buf[k:]
   292  		ints[i] = uint64(mean + delta)
   293  	}
   294  	assertTrue(len(buf) == 0, "extra bytes after decoding varints")
   295  	return ints
   296  }