github.com/apache/arrow/go/v14@v14.0.1/internal/hashing/hash_funcs.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package hashing 18 19 import ( 20 "math/bits" 21 "unsafe" 22 23 "github.com/zeebo/xxh3" 24 ) 25 26 func hashInt(val uint64, alg uint64) uint64 { 27 // Two of xxhash's prime multipliers (which are chosen for their 28 // bit dispersion properties) 29 var multipliers = [2]uint64{11400714785074694791, 14029467366897019727} 30 // Multiplying by the prime number mixes the low bits into the high bits, 31 // then byte-swapping (which is a single CPU instruction) allows the 32 // combined high and low bits to participate in the initial hash table index. 33 return bits.ReverseBytes64(multipliers[alg] * val) 34 } 35 36 func hashFloat32(val float32, alg uint64) uint64 { 37 // grab the raw byte pattern of the 38 bt := *(*[4]byte)(unsafe.Pointer(&val)) 39 x := uint64(*(*uint32)(unsafe.Pointer(&bt[0]))) 40 hx := hashInt(x, alg) 41 hy := hashInt(x, alg^1) 42 return 4 ^ hx ^ hy 43 } 44 45 func hashFloat64(val float64, alg uint64) uint64 { 46 bt := *(*[8]byte)(unsafe.Pointer(&val)) 47 hx := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[4]))), alg) 48 hy := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[0]))), alg^1) 49 return 8 ^ hx ^ hy 50 } 51 52 // prime constants used for slightly increasing the hash quality further 53 var exprimes = [2]uint64{1609587929392839161, 9650029242287828579} 54 55 // for smaller amounts of bytes this is faster than even calling into 56 // xxh3 to do the Hash, so we specialize in order to get the benefits 57 // of that performance. 58 func Hash(b []byte, alg uint64) uint64 { 59 n := uint32(len(b)) 60 if n <= 16 { 61 switch { 62 case n > 8: 63 // 8 < length <= 16 64 // apply same principle as above, but as two 64-bit ints 65 x := *(*uint64)(unsafe.Pointer(&b[n-8])) 66 y := *(*uint64)(unsafe.Pointer(&b[0])) 67 hx := hashInt(x, alg) 68 hy := hashInt(y, alg^1) 69 return uint64(n) ^ hx ^ hy 70 case n >= 4: 71 // 4 < length <= 8 72 // we can read the bytes as two overlapping 32-bit ints, apply different 73 // hash functions to each in parallel 74 // then xor the results 75 x := *(*uint32)(unsafe.Pointer(&b[n-4])) 76 y := *(*uint32)(unsafe.Pointer(&b[0])) 77 hx := hashInt(uint64(x), alg) 78 hy := hashInt(uint64(y), alg^1) 79 return uint64(n) ^ hx ^ hy 80 case n > 0: 81 x := uint32((n << 24) ^ (uint32(b[0]) << 16) ^ (uint32(b[n/2]) << 8) ^ uint32(b[n-1])) 82 return hashInt(uint64(x), alg) 83 case n == 0: 84 return 1 85 } 86 } 87 88 // increase differentiation enough to improve hash quality 89 return xxh3.Hash(b) + exprimes[alg] 90 }