github.com/scottcagno/storage@v1.8.0/pkg/hash/xxhash/xxhash32.go

github.com/scottcagno/storage@v1.8.0/pkg/hash/xxhash/xxhash32.go (about)

     1  /*
     2   * // Copyright (c) 2021. Scott Cagno. All rights reserved.
     3   * // The license can be found in the root of this project; see LICENSE.
     4   */
     5  
     6  package xxhash
     7  
     8  import "hash"
     9  
    10  const (
    11  	prime32_1 = 2654435761
    12  	prime32_2 = 2246822519
    13  	prime32_3 = 3266489917
    14  	prime32_4 = 668265263
    15  	prime32_5 = 374761393
    16  )
    17  
    18  type xxHash32 struct {
    19  	seed     uint32
    20  	v1       uint32
    21  	v2       uint32
    22  	v3       uint32
    23  	v4       uint32
    24  	totalLen uint64
    25  	buf      [16]byte
    26  	bufused  int
    27  }
    28  
    29  // New returns a new Hash32 instance.
    30  func NewHash32(seed uint32) hash.Hash32 {
    31  	xxh := &xxHash32{seed: seed}
    32  	xxh.Reset()
    33  	return xxh
    34  }
    35  
    36  func Sum32(b []byte) uint32 {
    37  	h := NewHash32(0xCAFE)
    38  	h.Write(b)
    39  	return h.Sum32()
    40  }
    41  
    42  // Sum appends the current hash to b and returns the resulting slice.
    43  // It does not change the underlying hash state.
    44  func (xxh xxHash32) Sum(b []byte) []byte {
    45  	h32 := xxh.Sum32()
    46  	return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
    47  }
    48  
    49  // Reset resets the Hash to its initial state.
    50  func (xxh *xxHash32) Reset() {
    51  	xxh.v1 = xxh.seed + prime32_1 + prime32_2
    52  	xxh.v2 = xxh.seed + prime32_2
    53  	xxh.v3 = xxh.seed
    54  	xxh.v4 = xxh.seed - prime32_1
    55  	xxh.totalLen = 0
    56  	xxh.bufused = 0
    57  }
    58  
    59  // Size returns the number of bytes returned by Sum().
    60  func (xxh *xxHash32) Size() int {
    61  	return 4
    62  }
    63  
    64  // BlockSize gives the minimum number of bytes accepted by Write().
    65  func (xxh *xxHash32) BlockSize() int {
    66  	return 1
    67  }
    68  
    69  // Write adds input bytes to the Hash.
    70  // It never returns an error.
    71  func (xxh *xxHash32) Write(input []byte) (int, error) {
    72  	n := len(input)
    73  	m := xxh.bufused
    74  
    75  	xxh.totalLen += uint64(n)
    76  
    77  	r := len(xxh.buf) - m
    78  	if n < r {
    79  		copy(xxh.buf[m:], input)
    80  		xxh.bufused += len(input)
    81  		return n, nil
    82  	}
    83  
    84  	p := 0
    85  	if m > 0 {
    86  		// some data left from previous update
    87  		copy(xxh.buf[xxh.bufused:], input[:r])
    88  		xxh.bufused += len(input) - r
    89  
    90  		// fast rotl(13)
    91  		xxh.v1 = u32_rol13(xxh.v1+u32_u32(xxh.buf[:])*prime32_2) * prime32_1
    92  		xxh.v2 = u32_rol13(xxh.v2+u32_u32(xxh.buf[4:])*prime32_2) * prime32_1
    93  		xxh.v3 = u32_rol13(xxh.v3+u32_u32(xxh.buf[8:])*prime32_2) * prime32_1
    94  		xxh.v4 = u32_rol13(xxh.v4+u32_u32(xxh.buf[12:])*prime32_2) * prime32_1
    95  		p = r
    96  		xxh.bufused = 0
    97  	}
    98  
    99  	// Causes compiler to work directly from registers instead of stack:
   100  	v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4
   101  	for n := n - 16; p <= n; p += 16 {
   102  		sub := input[p:][:16] //BCE hint for compiler
   103  		v1 = u32_rol13(v1+u32_u32(sub[:])*prime32_2) * prime32_1
   104  		v2 = u32_rol13(v2+u32_u32(sub[4:])*prime32_2) * prime32_1
   105  		v3 = u32_rol13(v3+u32_u32(sub[8:])*prime32_2) * prime32_1
   106  		v4 = u32_rol13(v4+u32_u32(sub[12:])*prime32_2) * prime32_1
   107  	}
   108  	xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4
   109  
   110  	copy(xxh.buf[xxh.bufused:], input[p:])
   111  	xxh.bufused += len(input) - p
   112  
   113  	return n, nil
   114  }
   115  
   116  // Sum32 returns the 32 bits Hash value.
   117  func (xxh *xxHash32) Sum32() uint32 {
   118  	h32 := uint32(xxh.totalLen)
   119  	if xxh.totalLen >= 16 {
   120  		h32 += u32_rol1(xxh.v1) + u32_rol7(xxh.v2) + u32_rol12(xxh.v3) + u32_rol18(xxh.v4)
   121  	} else {
   122  		h32 += xxh.seed + prime32_5
   123  	}
   124  
   125  	p := 0
   126  	n := xxh.bufused
   127  	for n := n - 4; p <= n; p += 4 {
   128  		h32 += u32_u32(xxh.buf[p:p+4]) * prime32_3
   129  		h32 = u32_rol17(h32) * prime32_4
   130  	}
   131  	for ; p < n; p++ {
   132  		h32 += uint32(xxh.buf[p]) * prime32_5
   133  		h32 = u32_rol11(h32) * prime32_1
   134  	}
   135  
   136  	h32 ^= h32 >> 15
   137  	h32 *= prime32_2
   138  	h32 ^= h32 >> 13
   139  	h32 *= prime32_3
   140  	h32 ^= h32 >> 16
   141  
   142  	return h32
   143  }
   144  
   145  // Checksum returns the 32bits Hash value.
   146  func Checksum32(input []byte, seed uint32) uint32 {
   147  	n := len(input)
   148  	h32 := uint32(n)
   149  
   150  	if n < 16 {
   151  		h32 += seed + prime32_5
   152  	} else {
   153  		v1 := seed + prime32_1 + prime32_2
   154  		v2 := seed + prime32_2
   155  		v3 := seed
   156  		v4 := seed - prime32_1
   157  		p := 0
   158  		for n := n - 16; p <= n; p += 16 {
   159  			sub := input[p:][:16] //BCE hint for compiler
   160  			v1 = u32_rol13(v1+u32_u32(sub[:])*prime32_2) * prime32_1
   161  			v2 = u32_rol13(v2+u32_u32(sub[4:])*prime32_2) * prime32_1
   162  			v3 = u32_rol13(v3+u32_u32(sub[8:])*prime32_2) * prime32_1
   163  			v4 = u32_rol13(v4+u32_u32(sub[12:])*prime32_2) * prime32_1
   164  		}
   165  		input = input[p:]
   166  		n -= p
   167  		h32 += u32_rol1(v1) + u32_rol7(v2) + u32_rol12(v3) + u32_rol18(v4)
   168  	}
   169  
   170  	p := 0
   171  	for n := n - 4; p <= n; p += 4 {
   172  		h32 += u32_u32(input[p:p+4]) * prime32_3
   173  		h32 = u32_rol17(h32) * prime32_4
   174  	}
   175  	for p < n {
   176  		h32 += uint32(input[p]) * prime32_5
   177  		h32 = u32_rol11(h32) * prime32_1
   178  		p++
   179  	}
   180  
   181  	h32 ^= h32 >> 15
   182  	h32 *= prime32_2
   183  	h32 ^= h32 >> 13
   184  	h32 *= prime32_3
   185  	h32 ^= h32 >> 16
   186  
   187  	return h32
   188  }
   189  
   190  func u32_u32(buf []byte) uint32 {
   191  	// go compiler recognizes this pattern and optimizes it on little endian platforms
   192  	return uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
   193  }
   194  
   195  func u32_rol1(u uint32) uint32 {
   196  	return u<<1 | u>>31
   197  }
   198  
   199  func u32_rol7(u uint32) uint32 {
   200  	return u<<7 | u>>25
   201  }
   202  
   203  func u32_rol11(u uint32) uint32 {
   204  	return u<<11 | u>>21
   205  }
   206  
   207  func u32_rol12(u uint32) uint32 {
   208  	return u<<12 | u>>20
   209  }
   210  
   211  func u32_rol13(u uint32) uint32 {
   212  	return u<<13 | u>>19
   213  }
   214  
   215  func u32_rol17(u uint32) uint32 {
   216  	return u<<17 | u>>15
   217  }
   218  
   219  func u32_rol18(u uint32) uint32 {
   220  	return u<<18 | u>>14
   221  }