github.com/matrixorigin/matrixone@v1.2.0/pkg/container/hashtable/hash.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build amd64 || arm64 16 17 package hashtable 18 19 import ( 20 "math/bits" 21 "math/rand" 22 "unsafe" 23 ) 24 25 var ( 26 Int64BatchHash = wyhashInt64Batch 27 Int64HashWithFixedSeed = wyhash64WithFixedSeed 28 BytesBatchGenHashStates = wyhashBytesBatch 29 BytesBatchGenHashStatesWithSeed = wyhashBytesBatchWithSeed 30 Int192BatchGenHashStates = wyhashInt192Batch 31 Int256BatchGenHashStates = wyhashInt256Batch 32 Int320BatchGenHashStates = wyhashInt320Batch 33 ) 34 35 // Hashing algorithm inspired by 36 // wyhash: https://github.com/wangyi-fudan/wyhash 37 38 var randseed uint64 39 var hashkey [4]uint64 40 41 func init() { 42 randseed = rand.Uint64() 43 hashkey[0] = rand.Uint64() 44 hashkey[1] = rand.Uint64() 45 hashkey[2] = rand.Uint64() 46 hashkey[3] = rand.Uint64() 47 } 48 49 const ( 50 m1 = 0xa0761d6478bd642f 51 m2 = 0xe7037ed1a0b428db 52 m3 = 0x8ebc6af09c88c6e3 53 m4 = 0x589965cc75374cc3 54 m5 = 0x1d8e4e27c47d124f 55 ) 56 57 func wyhash(p unsafe.Pointer, seed, s uint64) uint64 { 58 var a, b uint64 59 seed ^= hashkey[0] ^ m1 60 switch { 61 case s == 0: 62 return seed 63 case s < 4: 64 a = uint64(*(*byte)(p)) 65 a |= uint64(*(*byte)(unsafe.Add(p, s>>1))) << 8 66 a |= uint64(*(*byte)(unsafe.Add(p, s-1))) << 16 67 case s == 4: 68 a = r4(p) 69 b = a 70 case s < 8: 71 a = r4(p) 72 b = r4(unsafe.Add(p, s-4)) 73 case s == 8: 74 a = r8(p) 75 b = a 76 case s <= 16: 77 a = r8(p) 78 b = r8(unsafe.Add(p, s-8)) 79 default: 80 l := s 81 if l > 48 { 82 seed1 := seed 83 seed2 := seed 84 for ; l > 48; l -= 48 { 85 seed = mix(r8(p)^m2, r8(unsafe.Add(p, 8))^seed) 86 seed1 = mix(r8(unsafe.Add(p, 16))^m3, r8(unsafe.Add(p, 24))^seed1) 87 seed2 = mix(r8(unsafe.Add(p, 32))^m4, r8(unsafe.Add(p, 40))^seed2) 88 p = unsafe.Add(p, 48) 89 } 90 seed ^= seed1 ^ seed2 91 } 92 for ; l > 16; l -= 16 { 93 seed = mix(r8(p)^m2, r8(unsafe.Add(p, 8))^seed) 94 p = unsafe.Add(p, 16) 95 } 96 a = r8(unsafe.Add(p, l-16)) 97 b = r8(unsafe.Add(p, l-8)) 98 } 99 100 return mix(m5^s, mix(a^m2, b^seed)) 101 } 102 103 func wyhash64WithFixedSeed(x uint64) uint64 { 104 return mix(m5^8, mix(x^m2, x^m3^m4^m1)) 105 } 106 107 func wyhash64(x, seed uint64) uint64 { 108 return mix(m5^8, mix(x^m2, x^seed^hashkey[0]^m1)) 109 } 110 111 func mix(a, b uint64) uint64 { 112 hi, lo := bits.Mul64(uint64(a), uint64(b)) 113 return uint64(hi ^ lo) 114 } 115 116 func r4(p unsafe.Pointer) uint64 { 117 return uint64(*(*uint32)(p)) 118 } 119 120 func r8(p unsafe.Pointer) uint64 { 121 return *(*uint64)(p) 122 } 123 124 func wyhashInt64Batch(data unsafe.Pointer, hashes *uint64, length int) { 125 dataSlice := unsafe.Slice((*uint64)(data), length) 126 hashSlice := unsafe.Slice(hashes, length) 127 128 for i := 0; i < length; i++ { 129 hashSlice[i] = wyhash64(dataSlice[i], randseed) 130 } 131 } 132 133 func wyhashBytesBatch(data *[]byte, states *[3]uint64, length int) { 134 dataSlice := unsafe.Slice((*[]byte)(data), length) 135 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 136 for i := 0; i < length; i++ { 137 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, uint64(len(dataSlice[i]))) 138 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, uint64(len(dataSlice[i]))) 139 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, uint64(len(dataSlice[i]))) 140 } 141 } 142 143 func wyhashBytesBatchWithSeed(data *[]byte, states *[3]uint64, length int, seed uint64) { 144 dataSlice := unsafe.Slice((*[]byte)(data), length) 145 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 146 for i := 0; i < length; i++ { 147 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), seed, uint64(len(dataSlice[i]))) 148 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), seed<<32, uint64(len(dataSlice[i]))) 149 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), seed>>32, uint64(len(dataSlice[i]))) 150 } 151 } 152 153 func wyhashInt192Batch(data *[3]uint64, states *[3]uint64, length int) { 154 dataSlice := unsafe.Slice((*[3]uint64)(data), length) 155 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 156 for i := 0; i < length; i++ { 157 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 24) 158 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 24) 159 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 24) 160 } 161 } 162 163 func wyhashInt256Batch(data *[4]uint64, states *[3]uint64, length int) { 164 dataSlice := unsafe.Slice((*[4]uint64)(data), length) 165 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 166 for i := 0; i < length; i++ { 167 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 32) 168 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 32) 169 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 32) 170 } 171 } 172 173 func wyhashInt320Batch(data *[5]uint64, states *[3]uint64, length int) { 174 dataSlice := unsafe.Slice((*[5]uint64)(data), length) 175 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 176 for i := 0; i < length; i++ { 177 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 40) 178 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 40) 179 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 40) 180 } 181 }