github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/hash.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // Copyright 2014 The Go Authors. All rights reserved.
    12  // Use of this source code is governed by a BSD-style
    13  // license that can be found in the golang.org/LICENSE file.
    14  
    15  // Hashing algorithm inspired by
    16  //   xxhash: https://code.google.com/p/xxhash/
    17  // cityhash: https://code.google.com/p/cityhash/
    18  // Most of the code in this file is copied from the go runtime package. These
    19  // are the hash functions used for go maps.
    20  
    21  package colexec
    22  
    23  import (
    24  	"math/rand"
    25  	"unsafe"
    26  )
    27  
    28  const (
    29  	ptrSize = 4 << (^uintptr(0) >> 63) // unsafe.Sizeof(uintptr(0)) but an ideal const
    30  	c0      = uintptr((8-ptrSize)/4*2860486313 + (ptrSize-4)/4*33054211828000289)
    31  	c1      = uintptr((8-ptrSize)/4*3267000013 + (ptrSize-4)/4*23344194077549503)
    32  	// Constants for multiplication: four random odd 64-bit numbers.
    33  	m1 = 16877499708836156737
    34  	m2 = 2820277070424839065
    35  	m3 = 9497967016996688599
    36  	m4 = 15839092249703872147
    37  )
    38  
    39  // hashKey is used to seed the hash function.
    40  var hashKey [4]uintptr
    41  
    42  func init() {
    43  	for i := range hashKey {
    44  		hashKey[i] = 1
    45  	}
    46  }
    47  
    48  func readUnaligned32(p unsafe.Pointer) uint32 {
    49  	return *(*uint32)(p)
    50  }
    51  
    52  func readUnaligned64(p unsafe.Pointer) uint64 {
    53  	return *(*uint64)(p)
    54  }
    55  
    56  // Should be a built-in for unsafe.Pointer?
    57  //go:nosplit
    58  func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
    59  	return unsafe.Pointer(uintptr(p) + x)
    60  }
    61  
    62  // This function is copied from the Go runtime.
    63  // noescape hides a pointer from escape analysis.  noescape is
    64  // the identity function but escape analysis doesn't think the
    65  // output depends on the input.  noescape is inlined and currently
    66  // compiles down to zero instructions.
    67  // USE CAREFULLY!
    68  //go:nosplit
    69  func noescape(p unsafe.Pointer) unsafe.Pointer {
    70  	x := uintptr(p)
    71  	//lint:ignore SA4016 x ^ 0 is a no-op that fools escape analysis.
    72  	return unsafe.Pointer(x ^ 0)
    73  }
    74  
    75  func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
    76  	h := uint64(seed + s*hashKey[0])
    77  tail:
    78  	switch {
    79  	case s == 0:
    80  	case s < 4:
    81  		h ^= uint64(*(*byte)(p))
    82  		h ^= uint64(*(*byte)(add(p, s>>1))) << 8
    83  		h ^= uint64(*(*byte)(add(p, s-1))) << 16
    84  		h = rotl31(h*m1) * m2
    85  	case s <= 8:
    86  		h ^= uint64(readUnaligned32(p))
    87  		h ^= uint64(readUnaligned32(add(p, s-4))) << 32
    88  		h = rotl31(h*m1) * m2
    89  	case s <= 16:
    90  		h ^= readUnaligned64(p)
    91  		h = rotl31(h*m1) * m2
    92  		h ^= readUnaligned64(add(p, s-8))
    93  		h = rotl31(h*m1) * m2
    94  	case s <= 32:
    95  		h ^= readUnaligned64(p)
    96  		h = rotl31(h*m1) * m2
    97  		h ^= readUnaligned64(add(p, 8))
    98  		h = rotl31(h*m1) * m2
    99  		h ^= readUnaligned64(add(p, s-16))
   100  		h = rotl31(h*m1) * m2
   101  		h ^= readUnaligned64(add(p, s-8))
   102  		h = rotl31(h*m1) * m2
   103  	default:
   104  		v1 := h
   105  		v2 := uint64(seed * hashKey[1])
   106  		v3 := uint64(seed * hashKey[2])
   107  		v4 := uint64(seed * hashKey[3])
   108  		for s >= 32 {
   109  			v1 ^= readUnaligned64(p)
   110  			v1 = rotl31(v1*m1) * m2
   111  			p = add(p, 8)
   112  			v2 ^= readUnaligned64(p)
   113  			v2 = rotl31(v2*m2) * m3
   114  			p = add(p, 8)
   115  			v3 ^= readUnaligned64(p)
   116  			v3 = rotl31(v3*m3) * m4
   117  			p = add(p, 8)
   118  			v4 ^= readUnaligned64(p)
   119  			v4 = rotl31(v4*m4) * m1
   120  			p = add(p, 8)
   121  			s -= 32
   122  		}
   123  		h = v1 ^ v2 ^ v3 ^ v4
   124  		goto tail
   125  	}
   126  
   127  	h ^= h >> 29
   128  	h *= m3
   129  	h ^= h >> 32
   130  	return uintptr(h)
   131  }
   132  
   133  func memhash64(p unsafe.Pointer, seed uintptr) uintptr {
   134  	h := uint64(seed + 8*hashKey[0])
   135  	h ^= uint64(readUnaligned32(p)) | uint64(readUnaligned32(add(p, 4)))<<32
   136  	h = rotl31(h*m1) * m2
   137  	h ^= h >> 29
   138  	h *= m3
   139  	h ^= h >> 32
   140  	return uintptr(h)
   141  }
   142  
   143  // Note: in order to get the compiler to issue rotl instructions, we
   144  // need to constant fold the shift amount by hand.
   145  // TODO: convince the compiler to issue rotl instructions after inlining.
   146  func rotl31(x uint64) uint64 {
   147  	return (x << 31) | (x >> (64 - 31))
   148  }
   149  
   150  // NOTE: Because NaN != NaN, a map can contain any
   151  // number of (mostly useless) entries keyed with NaNs.
   152  // To avoid long hash chains, we assign a random number
   153  // as the hash value for a NaN.
   154  
   155  func f64hash(p unsafe.Pointer, h uintptr) uintptr {
   156  	f := *(*float64)(p)
   157  	switch {
   158  	case f == 0:
   159  		return c1 * (c0 ^ h) // +0, -0
   160  	case f != f:
   161  		// TODO(asubiotto): fastrand relies on some stack internals.
   162  		//return c1 * (c0 ^ h ^ uintptr(fastrand())) // any kind of NaN
   163  		return c1 * (c0 ^ h ^ uintptr(rand.Uint32())) // any kind of NaN
   164  	default:
   165  		return memhash(p, h, 8)
   166  	}
   167  }