github.com/matrixorigin/matrixone@v0.7.0/pkg/container/hashtable/hash.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build amd64 || arm64 16 17 package hashtable 18 19 import ( 20 "math/bits" 21 "math/rand" 22 "unsafe" 23 ) 24 25 var ( 26 Int64BatchHash = wyhashInt64Batch 27 Int64CellBatchHash = wyhashInt64CellBatch 28 29 BytesBatchGenHashStates = wyhashBytesBatch 30 Int192BatchGenHashStates = wyhashInt192Batch 31 Int256BatchGenHashStates = wyhashInt256Batch 32 Int320BatchGenHashStates = wyhashInt320Batch 33 ) 34 35 // Hashing algorithm inspired by 36 // wyhash: https://github.com/wangyi-fudan/wyhash 37 38 var randseed uint64 39 var hashkey [4]uint64 40 41 func init() { 42 randseed = rand.Uint64() 43 hashkey[0] = rand.Uint64() 44 hashkey[1] = rand.Uint64() 45 hashkey[2] = rand.Uint64() 46 hashkey[3] = rand.Uint64() 47 } 48 49 const ( 50 m1 = 0xa0761d6478bd642f 51 m2 = 0xe7037ed1a0b428db 52 m3 = 0x8ebc6af09c88c6e3 53 m4 = 0x589965cc75374cc3 54 m5 = 0x1d8e4e27c47d124f 55 ) 56 57 func wyhash(p unsafe.Pointer, seed, s uint64) uint64 { 58 var a, b uint64 59 seed ^= hashkey[0] ^ m1 60 switch { 61 case s == 0: 62 return seed 63 case s < 4: 64 a = uint64(*(*byte)(p)) 65 a |= uint64(*(*byte)(unsafe.Add(p, s>>1))) << 8 66 a |= uint64(*(*byte)(unsafe.Add(p, s-1))) << 16 67 case s == 4: 68 a = r4(p) 69 b = a 70 case s < 8: 71 a = r4(p) 72 b = r4(unsafe.Add(p, s-4)) 73 case s == 8: 74 a = r8(p) 75 b = a 76 case s <= 16: 77 a = r8(p) 78 b = r8(unsafe.Add(p, s-8)) 79 default: 80 l := s 81 if l > 48 { 82 seed1 := seed 83 seed2 := seed 84 for ; l > 48; l -= 48 { 85 seed = mix(r8(p)^m2, r8(unsafe.Add(p, 8))^seed) 86 seed1 = mix(r8(unsafe.Add(p, 16))^m3, r8(unsafe.Add(p, 24))^seed1) 87 seed2 = mix(r8(unsafe.Add(p, 32))^m4, r8(unsafe.Add(p, 40))^seed2) 88 p = unsafe.Add(p, 48) 89 } 90 seed ^= seed1 ^ seed2 91 } 92 for ; l > 16; l -= 16 { 93 seed = mix(r8(p)^m2, r8(unsafe.Add(p, 8))^seed) 94 p = unsafe.Add(p, 16) 95 } 96 a = r8(unsafe.Add(p, l-16)) 97 b = r8(unsafe.Add(p, l-8)) 98 } 99 100 return mix(m5^s, mix(a^m2, b^seed)) 101 } 102 103 func wyhash64(x, seed uint64) uint64 { 104 return mix(m5^8, mix(x^m2, x^seed^hashkey[0]^m1)) 105 } 106 107 func mix(a, b uint64) uint64 { 108 hi, lo := bits.Mul64(uint64(a), uint64(b)) 109 return uint64(hi ^ lo) 110 } 111 112 func r4(p unsafe.Pointer) uint64 { 113 return uint64(*(*uint32)(p)) 114 } 115 116 func r8(p unsafe.Pointer) uint64 { 117 return *(*uint64)(p) 118 } 119 120 func wyhashInt64Batch(data unsafe.Pointer, hashes *uint64, length int) { 121 dataSlice := unsafe.Slice((*uint64)(data), length) 122 hashSlice := unsafe.Slice(hashes, length) 123 124 for i := 0; i < length; i++ { 125 hashSlice[i] = wyhash64(dataSlice[i], randseed) 126 } 127 } 128 129 func wyhashInt64CellBatch(data unsafe.Pointer, hashes *uint64, length int) { 130 dataSlice := unsafe.Slice((*Int64HashMapCell)(data), length) 131 hashSlice := unsafe.Slice(hashes, length) 132 for i := 0; i < length; i++ { 133 hashSlice[i] = wyhash64(dataSlice[i].Key, randseed) 134 } 135 } 136 137 func wyhashBytesBatch(data *[]byte, states *[3]uint64, length int) { 138 dataSlice := unsafe.Slice((*[]byte)(data), length) 139 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 140 for i := 0; i < length; i++ { 141 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, uint64(len(dataSlice[i]))) 142 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, uint64(len(dataSlice[i]))) 143 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, uint64(len(dataSlice[i]))) 144 } 145 } 146 147 func wyhashInt192Batch(data *[3]uint64, states *[3]uint64, length int) { 148 dataSlice := unsafe.Slice((*[3]uint64)(data), length) 149 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 150 for i := 0; i < length; i++ { 151 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 24) 152 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 24) 153 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 24) 154 } 155 } 156 157 func wyhashInt256Batch(data *[4]uint64, states *[3]uint64, length int) { 158 dataSlice := unsafe.Slice((*[4]uint64)(data), length) 159 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 160 for i := 0; i < length; i++ { 161 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 32) 162 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 32) 163 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 32) 164 } 165 } 166 167 func wyhashInt320Batch(data *[5]uint64, states *[3]uint64, length int) { 168 dataSlice := unsafe.Slice((*[5]uint64)(data), length) 169 hashSlice := unsafe.Slice((*[3]uint64)(states), length) 170 for i := 0; i < length; i++ { 171 hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 40) 172 hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 40) 173 hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 40) 174 } 175 }