github.com/matrixorigin/matrixone@v1.2.0/pkg/container/hashtable/hash.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build amd64 || arm64
    16  
    17  package hashtable
    18  
    19  import (
    20  	"math/bits"
    21  	"math/rand"
    22  	"unsafe"
    23  )
    24  
    25  var (
    26  	Int64BatchHash                  = wyhashInt64Batch
    27  	Int64HashWithFixedSeed          = wyhash64WithFixedSeed
    28  	BytesBatchGenHashStates         = wyhashBytesBatch
    29  	BytesBatchGenHashStatesWithSeed = wyhashBytesBatchWithSeed
    30  	Int192BatchGenHashStates        = wyhashInt192Batch
    31  	Int256BatchGenHashStates        = wyhashInt256Batch
    32  	Int320BatchGenHashStates        = wyhashInt320Batch
    33  )
    34  
    35  // Hashing algorithm inspired by
    36  // wyhash: https://github.com/wangyi-fudan/wyhash
    37  
    38  var randseed uint64
    39  var hashkey [4]uint64
    40  
    41  func init() {
    42  	randseed = rand.Uint64()
    43  	hashkey[0] = rand.Uint64()
    44  	hashkey[1] = rand.Uint64()
    45  	hashkey[2] = rand.Uint64()
    46  	hashkey[3] = rand.Uint64()
    47  }
    48  
    49  const (
    50  	m1 = 0xa0761d6478bd642f
    51  	m2 = 0xe7037ed1a0b428db
    52  	m3 = 0x8ebc6af09c88c6e3
    53  	m4 = 0x589965cc75374cc3
    54  	m5 = 0x1d8e4e27c47d124f
    55  )
    56  
    57  func wyhash(p unsafe.Pointer, seed, s uint64) uint64 {
    58  	var a, b uint64
    59  	seed ^= hashkey[0] ^ m1
    60  	switch {
    61  	case s == 0:
    62  		return seed
    63  	case s < 4:
    64  		a = uint64(*(*byte)(p))
    65  		a |= uint64(*(*byte)(unsafe.Add(p, s>>1))) << 8
    66  		a |= uint64(*(*byte)(unsafe.Add(p, s-1))) << 16
    67  	case s == 4:
    68  		a = r4(p)
    69  		b = a
    70  	case s < 8:
    71  		a = r4(p)
    72  		b = r4(unsafe.Add(p, s-4))
    73  	case s == 8:
    74  		a = r8(p)
    75  		b = a
    76  	case s <= 16:
    77  		a = r8(p)
    78  		b = r8(unsafe.Add(p, s-8))
    79  	default:
    80  		l := s
    81  		if l > 48 {
    82  			seed1 := seed
    83  			seed2 := seed
    84  			for ; l > 48; l -= 48 {
    85  				seed = mix(r8(p)^m2, r8(unsafe.Add(p, 8))^seed)
    86  				seed1 = mix(r8(unsafe.Add(p, 16))^m3, r8(unsafe.Add(p, 24))^seed1)
    87  				seed2 = mix(r8(unsafe.Add(p, 32))^m4, r8(unsafe.Add(p, 40))^seed2)
    88  				p = unsafe.Add(p, 48)
    89  			}
    90  			seed ^= seed1 ^ seed2
    91  		}
    92  		for ; l > 16; l -= 16 {
    93  			seed = mix(r8(p)^m2, r8(unsafe.Add(p, 8))^seed)
    94  			p = unsafe.Add(p, 16)
    95  		}
    96  		a = r8(unsafe.Add(p, l-16))
    97  		b = r8(unsafe.Add(p, l-8))
    98  	}
    99  
   100  	return mix(m5^s, mix(a^m2, b^seed))
   101  }
   102  
   103  func wyhash64WithFixedSeed(x uint64) uint64 {
   104  	return mix(m5^8, mix(x^m2, x^m3^m4^m1))
   105  }
   106  
   107  func wyhash64(x, seed uint64) uint64 {
   108  	return mix(m5^8, mix(x^m2, x^seed^hashkey[0]^m1))
   109  }
   110  
   111  func mix(a, b uint64) uint64 {
   112  	hi, lo := bits.Mul64(uint64(a), uint64(b))
   113  	return uint64(hi ^ lo)
   114  }
   115  
   116  func r4(p unsafe.Pointer) uint64 {
   117  	return uint64(*(*uint32)(p))
   118  }
   119  
   120  func r8(p unsafe.Pointer) uint64 {
   121  	return *(*uint64)(p)
   122  }
   123  
   124  func wyhashInt64Batch(data unsafe.Pointer, hashes *uint64, length int) {
   125  	dataSlice := unsafe.Slice((*uint64)(data), length)
   126  	hashSlice := unsafe.Slice(hashes, length)
   127  
   128  	for i := 0; i < length; i++ {
   129  		hashSlice[i] = wyhash64(dataSlice[i], randseed)
   130  	}
   131  }
   132  
   133  func wyhashBytesBatch(data *[]byte, states *[3]uint64, length int) {
   134  	dataSlice := unsafe.Slice((*[]byte)(data), length)
   135  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   136  	for i := 0; i < length; i++ {
   137  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, uint64(len(dataSlice[i])))
   138  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, uint64(len(dataSlice[i])))
   139  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, uint64(len(dataSlice[i])))
   140  	}
   141  }
   142  
   143  func wyhashBytesBatchWithSeed(data *[]byte, states *[3]uint64, length int, seed uint64) {
   144  	dataSlice := unsafe.Slice((*[]byte)(data), length)
   145  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   146  	for i := 0; i < length; i++ {
   147  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), seed, uint64(len(dataSlice[i])))
   148  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), seed<<32, uint64(len(dataSlice[i])))
   149  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), seed>>32, uint64(len(dataSlice[i])))
   150  	}
   151  }
   152  
   153  func wyhashInt192Batch(data *[3]uint64, states *[3]uint64, length int) {
   154  	dataSlice := unsafe.Slice((*[3]uint64)(data), length)
   155  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   156  	for i := 0; i < length; i++ {
   157  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 24)
   158  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 24)
   159  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 24)
   160  	}
   161  }
   162  
   163  func wyhashInt256Batch(data *[4]uint64, states *[3]uint64, length int) {
   164  	dataSlice := unsafe.Slice((*[4]uint64)(data), length)
   165  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   166  	for i := 0; i < length; i++ {
   167  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 32)
   168  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 32)
   169  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 32)
   170  	}
   171  }
   172  
   173  func wyhashInt320Batch(data *[5]uint64, states *[3]uint64, length int) {
   174  	dataSlice := unsafe.Slice((*[5]uint64)(data), length)
   175  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   176  	for i := 0; i < length; i++ {
   177  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 40)
   178  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 40)
   179  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 40)
   180  	}
   181  }