github.com/apache/arrow/go/v14@v14.0.2/internal/hashing/hashing_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package hashing 18 19 import ( 20 "math/rand" 21 "testing" 22 23 "github.com/stretchr/testify/assert" 24 ) 25 26 func MakeDistinctIntegers(nvals int) map[int]bool { 27 r := rand.New(rand.NewSource(42)) 28 values := make(map[int]bool) 29 for len(values) < nvals { 30 values[r.Int()] = true 31 } 32 return values 33 } 34 35 func MakeSequentialIntegers(nvals int) map[int]bool { 36 values := make(map[int]bool) 37 for i := 0; i < nvals; i++ { 38 values[i] = true 39 } 40 return values 41 } 42 43 func MakeDistinctStrings(nvals int) map[string]bool { 44 values := make(map[string]bool) 45 46 r := rand.New(rand.NewSource(42)) 47 48 max := 'z' 49 min := '0' 50 for len(values) < nvals { 51 data := make([]byte, r.Intn(24)) 52 for idx := range data { 53 data[idx] = byte(r.Intn(int(max-min+1)) + int(min)) 54 } 55 values[string(data)] = true 56 } 57 return values 58 } 59 60 func TestHashingQualityInt(t *testing.T) { 61 const nvalues = 10000 62 63 tests := []struct { 64 name string 65 values map[int]bool 66 quality float64 67 }{ 68 {"distinct", MakeDistinctIntegers(nvalues), 0.96}, 69 {"sequential", MakeSequentialIntegers(nvalues), 0.96}, 70 } 71 72 for _, tt := range tests { 73 t.Run(tt.name, func(t *testing.T) { 74 hashes := make(map[uint64]bool) 75 for k := range tt.values { 76 hashes[hashInt(uint64(k), 0)] = true 77 hashes[hashInt(uint64(k), 1)] = true 78 } 79 assert.GreaterOrEqual(t, float64(len(hashes)), tt.quality*float64(2*len(tt.values))) 80 }) 81 } 82 } 83 84 func TestHashingBoundsStrings(t *testing.T) { 85 sizes := []int{1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17, 18, 19, 20, 21} 86 for _, s := range sizes { 87 str := make([]byte, s) 88 for idx := range str { 89 str[idx] = uint8(idx) 90 } 91 92 h := Hash(str, 1) 93 diff := 0 94 for i := 0; i < 120; i++ { 95 str[len(str)-1] = uint8(i) 96 if Hash(str, 1) != h { 97 diff++ 98 } 99 } 100 assert.GreaterOrEqual(t, diff, 118) 101 } 102 } 103 104 func TestHashingQualityString(t *testing.T) { 105 const nvalues = 10000 106 values := MakeDistinctStrings(nvalues) 107 108 hashes := make(map[uint64]bool) 109 for k := range values { 110 hashes[hashString(k, 0)] = true 111 hashes[hashString(k, 1)] = true 112 } 113 assert.GreaterOrEqual(t, float64(len(hashes)), 0.96*float64(2*len(values))) 114 }