github.com/scottcagno/storage@v1.8.0/pkg/hash/xxhash/xxhash64.go (about)

     1  /*
     2   * // Copyright (c) 2021. Scott Cagno. All rights reserved.
     3   * // The license can be found in the root of this project; see LICENSE.
     4   */
     5  
     6  package xxhash
     7  
     8  import "hash"
     9  
    10  const (
    11  	prime64_1 = 11400714785074694791
    12  	prime64_2 = 14029467366897019727
    13  	prime64_3 = 1609587929392839161
    14  	prime64_4 = 9650029242287828579
    15  	prime64_5 = 2870177450012600261
    16  )
    17  
    18  type xxHash64 struct {
    19  	seed     uint64
    20  	v1       uint64
    21  	v2       uint64
    22  	v3       uint64
    23  	v4       uint64
    24  	totalLen uint64
    25  	buf      [32]byte
    26  	bufused  int
    27  }
    28  
    29  // New returns a new Hash64 instance.
    30  func NewHash64(seed uint64) hash.Hash64 {
    31  	xxh := &xxHash64{seed: seed}
    32  	xxh.Reset()
    33  	return xxh
    34  }
    35  
    36  func Sum64(b []byte) uint64 {
    37  	h := NewHash64(0xCAFE)
    38  	h.Write(b)
    39  	return h.Sum64()
    40  }
    41  
    42  // Sum appends the current hash to b and returns the resulting slice.
    43  // It does not change the underlying hash state.
    44  func (xxh xxHash64) Sum(b []byte) []byte {
    45  	h64 := xxh.Sum64()
    46  	return append(b, byte(h64), byte(h64>>8), byte(h64>>16), byte(h64>>24), byte(h64>>32), byte(h64>>40), byte(h64>>48), byte(h64>>56))
    47  }
    48  
    49  // Reset resets the Hash to its initial state.
    50  func (xxh *xxHash64) Reset() {
    51  	xxh.v1 = xxh.seed + prime64_1 + prime64_2
    52  	xxh.v2 = xxh.seed + prime64_2
    53  	xxh.v3 = xxh.seed
    54  	xxh.v4 = xxh.seed - prime64_1
    55  	xxh.totalLen = 0
    56  	xxh.bufused = 0
    57  }
    58  
    59  // Size returns the number of bytes returned by Sum().
    60  func (xxh *xxHash64) Size() int {
    61  	return 8
    62  }
    63  
    64  // BlockSize gives the minimum number of bytes accepted by Write().
    65  func (xxh *xxHash64) BlockSize() int {
    66  	return 1
    67  }
    68  
    69  // Write adds input bytes to the Hash.
    70  // It never returns an error.
    71  func (xxh *xxHash64) Write(input []byte) (int, error) {
    72  	n := len(input)
    73  	m := xxh.bufused
    74  
    75  	xxh.totalLen += uint64(n)
    76  
    77  	r := len(xxh.buf) - m
    78  	if n < r {
    79  		copy(xxh.buf[m:], input)
    80  		xxh.bufused += len(input)
    81  		return n, nil
    82  	}
    83  
    84  	p := 0
    85  	if m > 0 {
    86  		// some data left from previous update
    87  		copy(xxh.buf[xxh.bufused:], input[:r])
    88  		xxh.bufused += len(input) - r
    89  
    90  		// fast rotl(31)
    91  		xxh.v1 = u64_rol31(xxh.v1+u64_u64(xxh.buf[:])*prime64_2) * prime64_1
    92  		xxh.v2 = u64_rol31(xxh.v2+u64_u64(xxh.buf[8:])*prime64_2) * prime64_1
    93  		xxh.v3 = u64_rol31(xxh.v3+u64_u64(xxh.buf[16:])*prime64_2) * prime64_1
    94  		xxh.v4 = u64_rol31(xxh.v4+u64_u64(xxh.buf[24:])*prime64_2) * prime64_1
    95  		p = r
    96  		xxh.bufused = 0
    97  	}
    98  
    99  	// Causes compiler to work directly from registers instead of stack:
   100  	v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4
   101  	for n := n - 32; p <= n; p += 32 {
   102  		sub := input[p:][:32] //BCE hint for compiler
   103  		v1 = u64_rol31(v1+u64_u64(sub[:])*prime64_2) * prime64_1
   104  		v2 = u64_rol31(v2+u64_u64(sub[8:])*prime64_2) * prime64_1
   105  		v3 = u64_rol31(v3+u64_u64(sub[16:])*prime64_2) * prime64_1
   106  		v4 = u64_rol31(v4+u64_u64(sub[24:])*prime64_2) * prime64_1
   107  	}
   108  	xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4
   109  
   110  	copy(xxh.buf[xxh.bufused:], input[p:])
   111  	xxh.bufused += len(input) - p
   112  
   113  	return n, nil
   114  }
   115  
   116  // Sum64 returns the 64bits Hash value.
   117  func (xxh *xxHash64) Sum64() uint64 {
   118  	var h64 uint64
   119  	if xxh.totalLen >= 32 {
   120  		h64 = u64_rol1(xxh.v1) + u64_rol7(xxh.v2) + u64_rol12(xxh.v3) + u64_rol18(xxh.v4)
   121  
   122  		xxh.v1 *= prime64_2
   123  		xxh.v2 *= prime64_2
   124  		xxh.v3 *= prime64_2
   125  		xxh.v4 *= prime64_2
   126  
   127  		h64 = (h64^(u64_rol31(xxh.v1)*prime64_1))*prime64_1 + prime64_4
   128  		h64 = (h64^(u64_rol31(xxh.v2)*prime64_1))*prime64_1 + prime64_4
   129  		h64 = (h64^(u64_rol31(xxh.v3)*prime64_1))*prime64_1 + prime64_4
   130  		h64 = (h64^(u64_rol31(xxh.v4)*prime64_1))*prime64_1 + prime64_4
   131  
   132  		h64 += xxh.totalLen
   133  	} else {
   134  		h64 = xxh.seed + prime64_5 + xxh.totalLen
   135  	}
   136  
   137  	p := 0
   138  	n := xxh.bufused
   139  	for n := n - 8; p <= n; p += 8 {
   140  		h64 ^= u64_rol31(u64_u64(xxh.buf[p:p+8])*prime64_2) * prime64_1
   141  		h64 = u64_rol27(h64)*prime64_1 + prime64_4
   142  	}
   143  	if p+4 <= n {
   144  		sub := xxh.buf[p : p+4]
   145  		h64 ^= uint64(u64_u32(sub)) * prime64_1
   146  		h64 = u64_rol23(h64)*prime64_2 + prime64_3
   147  		p += 4
   148  	}
   149  	for ; p < n; p++ {
   150  		h64 ^= uint64(xxh.buf[p]) * prime64_5
   151  		h64 = u64_rol11(h64) * prime64_1
   152  	}
   153  
   154  	h64 ^= h64 >> 33
   155  	h64 *= prime64_2
   156  	h64 ^= h64 >> 29
   157  	h64 *= prime64_3
   158  	h64 ^= h64 >> 32
   159  
   160  	return h64
   161  }
   162  
   163  // Checksum returns the 64bits Hash value.
   164  func Checksum(input []byte, seed uint64) uint64 {
   165  	n := len(input)
   166  	var h64 uint64
   167  
   168  	if n >= 32 {
   169  		v1 := seed + prime64_1 + prime64_2
   170  		v2 := seed + prime64_2
   171  		v3 := seed
   172  		v4 := seed - prime64_1
   173  		p := 0
   174  		for n := n - 32; p <= n; p += 32 {
   175  			sub := input[p:][:32] //BCE hint for compiler
   176  			v1 = u64_rol31(v1+u64_u64(sub[:])*prime64_2) * prime64_1
   177  			v2 = u64_rol31(v2+u64_u64(sub[8:])*prime64_2) * prime64_1
   178  			v3 = u64_rol31(v3+u64_u64(sub[16:])*prime64_2) * prime64_1
   179  			v4 = u64_rol31(v4+u64_u64(sub[24:])*prime64_2) * prime64_1
   180  		}
   181  
   182  		h64 = u64_rol1(v1) + u64_rol7(v2) + u64_rol12(v3) + u64_rol18(v4)
   183  
   184  		v1 *= prime64_2
   185  		v2 *= prime64_2
   186  		v3 *= prime64_2
   187  		v4 *= prime64_2
   188  
   189  		h64 = (h64^(u64_rol31(v1)*prime64_1))*prime64_1 + prime64_4
   190  		h64 = (h64^(u64_rol31(v2)*prime64_1))*prime64_1 + prime64_4
   191  		h64 = (h64^(u64_rol31(v3)*prime64_1))*prime64_1 + prime64_4
   192  		h64 = (h64^(u64_rol31(v4)*prime64_1))*prime64_1 + prime64_4
   193  
   194  		h64 += uint64(n)
   195  
   196  		input = input[p:]
   197  		n -= p
   198  	} else {
   199  		h64 = seed + prime64_5 + uint64(n)
   200  	}
   201  
   202  	p := 0
   203  	for n := n - 8; p <= n; p += 8 {
   204  		sub := input[p : p+8]
   205  		h64 ^= u64_rol31(u64_u64(sub)*prime64_2) * prime64_1
   206  		h64 = u64_rol27(h64)*prime64_1 + prime64_4
   207  	}
   208  	if p+4 <= n {
   209  		sub := input[p : p+4]
   210  		h64 ^= uint64(u64_u32(sub)) * prime64_1
   211  		h64 = u64_rol23(h64)*prime64_2 + prime64_3
   212  		p += 4
   213  	}
   214  	for ; p < n; p++ {
   215  		h64 ^= uint64(input[p]) * prime64_5
   216  		h64 = u64_rol11(h64) * prime64_1
   217  	}
   218  
   219  	h64 ^= h64 >> 33
   220  	h64 *= prime64_2
   221  	h64 ^= h64 >> 29
   222  	h64 *= prime64_3
   223  	h64 ^= h64 >> 32
   224  
   225  	return h64
   226  }
   227  
   228  func u64_u64(buf []byte) uint64 {
   229  	// go compiler recognizes this pattern and optimizes it on little endian platforms
   230  	return uint64(buf[0]) | uint64(buf[1])<<8 | uint64(buf[2])<<16 | uint64(buf[3])<<24 | uint64(buf[4])<<32 | uint64(buf[5])<<40 | uint64(buf[6])<<48 | uint64(buf[7])<<56
   231  }
   232  
   233  func u64_u32(buf []byte) uint32 {
   234  	return uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
   235  }
   236  
   237  func u64_rol1(u uint64) uint64 {
   238  	return u<<1 | u>>63
   239  }
   240  
   241  func u64_rol7(u uint64) uint64 {
   242  	return u<<7 | u>>57
   243  }
   244  
   245  func u64_rol11(u uint64) uint64 {
   246  	return u<<11 | u>>53
   247  }
   248  
   249  func u64_rol12(u uint64) uint64 {
   250  	return u<<12 | u>>52
   251  }
   252  
   253  func u64_rol18(u uint64) uint64 {
   254  	return u<<18 | u>>46
   255  }
   256  
   257  func u64_rol23(u uint64) uint64 {
   258  	return u<<23 | u>>41
   259  }
   260  
   261  func u64_rol27(u uint64) uint64 {
   262  	return u<<27 | u>>37
   263  }
   264  func u64_rol31(u uint64) uint64 {
   265  	return u<<31 | u>>33
   266  }