github.com/apache/arrow/go/v14@v14.0.2/internal/hashing/hash_funcs.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package hashing
    18  
    19  import (
    20  	"math/bits"
    21  	"unsafe"
    22  
    23  	"github.com/zeebo/xxh3"
    24  )
    25  
    26  func hashInt(val uint64, alg uint64) uint64 {
    27  	// Two of xxhash's prime multipliers (which are chosen for their
    28  	// bit dispersion properties)
    29  	var multipliers = [2]uint64{11400714785074694791, 14029467366897019727}
    30  	// Multiplying by the prime number mixes the low bits into the high bits,
    31  	// then byte-swapping (which is a single CPU instruction) allows the
    32  	// combined high and low bits to participate in the initial hash table index.
    33  	return bits.ReverseBytes64(multipliers[alg] * val)
    34  }
    35  
    36  func hashFloat32(val float32, alg uint64) uint64 {
    37  	// grab the raw byte pattern of the
    38  	bt := *(*[4]byte)(unsafe.Pointer(&val))
    39  	x := uint64(*(*uint32)(unsafe.Pointer(&bt[0])))
    40  	hx := hashInt(x, alg)
    41  	hy := hashInt(x, alg^1)
    42  	return 4 ^ hx ^ hy
    43  }
    44  
    45  func hashFloat64(val float64, alg uint64) uint64 {
    46  	bt := *(*[8]byte)(unsafe.Pointer(&val))
    47  	hx := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[4]))), alg)
    48  	hy := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[0]))), alg^1)
    49  	return 8 ^ hx ^ hy
    50  }
    51  
    52  // prime constants used for slightly increasing the hash quality further
    53  var exprimes = [2]uint64{1609587929392839161, 9650029242287828579}
    54  
    55  // for smaller amounts of bytes this is faster than even calling into
    56  // xxh3 to do the Hash, so we specialize in order to get the benefits
    57  // of that performance.
    58  func Hash(b []byte, alg uint64) uint64 {
    59  	n := uint32(len(b))
    60  	if n <= 16 {
    61  		switch {
    62  		case n > 8:
    63  			// 8 < length <= 16
    64  			// apply same principle as above, but as two 64-bit ints
    65  			x := *(*uint64)(unsafe.Pointer(&b[n-8]))
    66  			y := *(*uint64)(unsafe.Pointer(&b[0]))
    67  			hx := hashInt(x, alg)
    68  			hy := hashInt(y, alg^1)
    69  			return uint64(n) ^ hx ^ hy
    70  		case n >= 4:
    71  			// 4 < length <= 8
    72  			// we can read the bytes as two overlapping 32-bit ints, apply different
    73  			// hash functions to each in parallel
    74  			// then xor the results
    75  			x := *(*uint32)(unsafe.Pointer(&b[n-4]))
    76  			y := *(*uint32)(unsafe.Pointer(&b[0]))
    77  			hx := hashInt(uint64(x), alg)
    78  			hy := hashInt(uint64(y), alg^1)
    79  			return uint64(n) ^ hx ^ hy
    80  		case n > 0:
    81  			x := uint32((n << 24) ^ (uint32(b[0]) << 16) ^ (uint32(b[n/2]) << 8) ^ uint32(b[n-1]))
    82  			return hashInt(uint64(x), alg)
    83  		case n == 0:
    84  			return 1
    85  		}
    86  	}
    87  
    88  	// increase differentiation enough to improve hash quality
    89  	return xxh3.Hash(b) + exprimes[alg]
    90  }