github.com/matrixorigin/matrixone@v0.7.0/pkg/container/hashtable/hash.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build amd64 || arm64
    16  
    17  package hashtable
    18  
    19  import (
    20  	"math/bits"
    21  	"math/rand"
    22  	"unsafe"
    23  )
    24  
    25  var (
    26  	Int64BatchHash     = wyhashInt64Batch
    27  	Int64CellBatchHash = wyhashInt64CellBatch
    28  
    29  	BytesBatchGenHashStates  = wyhashBytesBatch
    30  	Int192BatchGenHashStates = wyhashInt192Batch
    31  	Int256BatchGenHashStates = wyhashInt256Batch
    32  	Int320BatchGenHashStates = wyhashInt320Batch
    33  )
    34  
    35  // Hashing algorithm inspired by
    36  // wyhash: https://github.com/wangyi-fudan/wyhash
    37  
    38  var randseed uint64
    39  var hashkey [4]uint64
    40  
    41  func init() {
    42  	randseed = rand.Uint64()
    43  	hashkey[0] = rand.Uint64()
    44  	hashkey[1] = rand.Uint64()
    45  	hashkey[2] = rand.Uint64()
    46  	hashkey[3] = rand.Uint64()
    47  }
    48  
    49  const (
    50  	m1 = 0xa0761d6478bd642f
    51  	m2 = 0xe7037ed1a0b428db
    52  	m3 = 0x8ebc6af09c88c6e3
    53  	m4 = 0x589965cc75374cc3
    54  	m5 = 0x1d8e4e27c47d124f
    55  )
    56  
    57  func wyhash(p unsafe.Pointer, seed, s uint64) uint64 {
    58  	var a, b uint64
    59  	seed ^= hashkey[0] ^ m1
    60  	switch {
    61  	case s == 0:
    62  		return seed
    63  	case s < 4:
    64  		a = uint64(*(*byte)(p))
    65  		a |= uint64(*(*byte)(unsafe.Add(p, s>>1))) << 8
    66  		a |= uint64(*(*byte)(unsafe.Add(p, s-1))) << 16
    67  	case s == 4:
    68  		a = r4(p)
    69  		b = a
    70  	case s < 8:
    71  		a = r4(p)
    72  		b = r4(unsafe.Add(p, s-4))
    73  	case s == 8:
    74  		a = r8(p)
    75  		b = a
    76  	case s <= 16:
    77  		a = r8(p)
    78  		b = r8(unsafe.Add(p, s-8))
    79  	default:
    80  		l := s
    81  		if l > 48 {
    82  			seed1 := seed
    83  			seed2 := seed
    84  			for ; l > 48; l -= 48 {
    85  				seed = mix(r8(p)^m2, r8(unsafe.Add(p, 8))^seed)
    86  				seed1 = mix(r8(unsafe.Add(p, 16))^m3, r8(unsafe.Add(p, 24))^seed1)
    87  				seed2 = mix(r8(unsafe.Add(p, 32))^m4, r8(unsafe.Add(p, 40))^seed2)
    88  				p = unsafe.Add(p, 48)
    89  			}
    90  			seed ^= seed1 ^ seed2
    91  		}
    92  		for ; l > 16; l -= 16 {
    93  			seed = mix(r8(p)^m2, r8(unsafe.Add(p, 8))^seed)
    94  			p = unsafe.Add(p, 16)
    95  		}
    96  		a = r8(unsafe.Add(p, l-16))
    97  		b = r8(unsafe.Add(p, l-8))
    98  	}
    99  
   100  	return mix(m5^s, mix(a^m2, b^seed))
   101  }
   102  
   103  func wyhash64(x, seed uint64) uint64 {
   104  	return mix(m5^8, mix(x^m2, x^seed^hashkey[0]^m1))
   105  }
   106  
   107  func mix(a, b uint64) uint64 {
   108  	hi, lo := bits.Mul64(uint64(a), uint64(b))
   109  	return uint64(hi ^ lo)
   110  }
   111  
   112  func r4(p unsafe.Pointer) uint64 {
   113  	return uint64(*(*uint32)(p))
   114  }
   115  
   116  func r8(p unsafe.Pointer) uint64 {
   117  	return *(*uint64)(p)
   118  }
   119  
   120  func wyhashInt64Batch(data unsafe.Pointer, hashes *uint64, length int) {
   121  	dataSlice := unsafe.Slice((*uint64)(data), length)
   122  	hashSlice := unsafe.Slice(hashes, length)
   123  
   124  	for i := 0; i < length; i++ {
   125  		hashSlice[i] = wyhash64(dataSlice[i], randseed)
   126  	}
   127  }
   128  
   129  func wyhashInt64CellBatch(data unsafe.Pointer, hashes *uint64, length int) {
   130  	dataSlice := unsafe.Slice((*Int64HashMapCell)(data), length)
   131  	hashSlice := unsafe.Slice(hashes, length)
   132  	for i := 0; i < length; i++ {
   133  		hashSlice[i] = wyhash64(dataSlice[i].Key, randseed)
   134  	}
   135  }
   136  
   137  func wyhashBytesBatch(data *[]byte, states *[3]uint64, length int) {
   138  	dataSlice := unsafe.Slice((*[]byte)(data), length)
   139  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   140  	for i := 0; i < length; i++ {
   141  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, uint64(len(dataSlice[i])))
   142  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, uint64(len(dataSlice[i])))
   143  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, uint64(len(dataSlice[i])))
   144  	}
   145  }
   146  
   147  func wyhashInt192Batch(data *[3]uint64, states *[3]uint64, length int) {
   148  	dataSlice := unsafe.Slice((*[3]uint64)(data), length)
   149  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   150  	for i := 0; i < length; i++ {
   151  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 24)
   152  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 24)
   153  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 24)
   154  	}
   155  }
   156  
   157  func wyhashInt256Batch(data *[4]uint64, states *[3]uint64, length int) {
   158  	dataSlice := unsafe.Slice((*[4]uint64)(data), length)
   159  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   160  	for i := 0; i < length; i++ {
   161  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 32)
   162  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 32)
   163  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 32)
   164  	}
   165  }
   166  
   167  func wyhashInt320Batch(data *[5]uint64, states *[3]uint64, length int) {
   168  	dataSlice := unsafe.Slice((*[5]uint64)(data), length)
   169  	hashSlice := unsafe.Slice((*[3]uint64)(states), length)
   170  	for i := 0; i < length; i++ {
   171  		hashSlice[i][0] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed, 40)
   172  		hashSlice[i][1] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed<<32, 40)
   173  		hashSlice[i][2] = wyhash(unsafe.Pointer(&dataSlice[i][0]), randseed>>32, 40)
   174  	}
   175  }