github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/hash/jump/jump_test.go

github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/hash/jump/jump_test.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package jump
    22  
    23  import (
    24  	"math"
    25  	"testing"
    26  
    27  	"github.com/stretchr/testify/assert"
    28  )
    29  
    30  // TestHashDistribution tests that Hash is distributing keys evenly among buckets
    31  func TestHashDistribution(t *testing.T) {
    32  	// 95th percentile of Chi-Squared Distribution with 9, 99, 999, 9999 degrees of freedom
    33  	chiSquared95Percentiles := []float64{16.9190, 123.2252, 1073.6427, 10232.7373}
    34  
    35  	numBuckets := []int64{10, 100, 1000, 10000}
    36  	for i, n := range numBuckets {
    37  		totalKeys := uint64(n) * 1000
    38  		stepSize := math.MaxUint64 / totalKeys
    39  		buckets := make(map[int64]int, n)
    40  
    41  		var hash, j uint64
    42  		for ; j < totalKeys; j++ {
    43  			idx := Hash(hash, n)
    44  			buckets[idx]++
    45  			hash += stepSize
    46  		}
    47  
    48  		expected := float64(totalKeys) / float64(n)
    49  		var testStatistic float64
    50  		for _, observed := range buckets {
    51  			diff := float64(observed) - expected
    52  			testStatistic += diff * diff / expected
    53  		}
    54  
    55  		// To test that Hash is evenly distributing keys among the buckets we'll perform Pearson's
    56  		// chi-squared test with a significance level of 95%.
    57  		assert.True(t, testStatistic < chiSquared95Percentiles[i])
    58  	}
    59  }
    60  
    61  // TestHashMoved tests that Hash only redistributes approximately 1/(n+1) keys when going from
    62  // n to n+1 buckets
    63  func TestHashMoved(t *testing.T) {
    64  	numBuckets := []int64{10, 100, 1000}
    65  
    66  	for _, n := range numBuckets {
    67  		totalKeys := uint64(n) * 1000
    68  		stepSize := math.MaxUint64 / totalKeys
    69  		oldBuckets := make(map[uint64]int64, totalKeys)
    70  
    71  		var hash, j uint64
    72  		for ; j < totalKeys; j++ {
    73  			idx := Hash(hash, n)
    74  			oldBuckets[hash] = idx
    75  			hash += stepSize
    76  		}
    77  
    78  		newBuckets := make(map[uint64]int64, totalKeys)
    79  
    80  		hash, j = 0, 0
    81  		for ; j < totalKeys; j++ {
    82  			idx := Hash(hash, n+1)
    83  			newBuckets[hash] = idx
    84  			hash += stepSize
    85  		}
    86  
    87  		var numMoved int
    88  		hash = 0
    89  		for ; j < totalKeys; j++ {
    90  			if oldBuckets[hash] != newBuckets[hash] {
    91  				numMoved++
    92  			}
    93  			hash += stepSize
    94  		}
    95  
    96  		movedPercent := float64(numMoved) / float64(totalKeys)
    97  		idealPercent := 1.0 / float64(n+1)
    98  
    99  		// To test that Hash is redistributing approximately the ideal number of keys we require that
   100  		// the percentage of moved key is less than 1.5 times the ideal percentage of moved keys
   101  		assert.True(t, movedPercent < 1.5*idealPercent)
   102  	}
   103  }
   104  
   105  func TestHashBadInput(t *testing.T) {
   106  	expected := int64(-1)
   107  	actual := Hash(21, -1)
   108  	assert.Equal(t, expected, actual)
   109  }
   110  
   111  func BenchmarkHash(b *testing.B) {
   112  	for i := 0; i < b.N; i++ {
   113  		Hash(42, 132)
   114  	}
   115  }