github.com/bytedance/gopkg@v0.0.0-20240514070511-01b2cbcf35e1/util/xxhash3/internal/xxh3_raw/xxh3_raw.go (about)

     1  // Copyright 2021 ByteDance Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package xxh3_raw
    16  
    17  import (
    18  	"math/bits"
    19  	"unsafe"
    20  
    21  	"github.com/bytedance/gopkg/internal/runtimex"
    22  )
    23  
    24  const (
    25  	_stripe = 64
    26  	_block  = 1024
    27  
    28  	prime32_1 = 2654435761
    29  	prime32_2 = 2246822519
    30  	prime32_3 = 3266489917
    31  
    32  	prime64_1 = 11400714785074694791
    33  	prime64_2 = 14029467366897019727
    34  	prime64_3 = 1609587929392839161
    35  	prime64_4 = 9650029242287828579
    36  	prime64_5 = 2870177450012600261
    37  )
    38  
    39  var xsecret = unsafe.Pointer(&[192]uint8{
    40  	/* 	0 	*/ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
    41  	/* 	16 	*/ 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
    42  	/* 	32 	*/ 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
    43  	/* 	48 	*/ 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
    44  	/* 	64 	*/ 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
    45  	/* 	80 	*/ 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
    46  	/* 	96 	*/ 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
    47  	/* 	112	*/ 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
    48  	/* 	128	*/ 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
    49  	/* 	144	*/ 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
    50  	/* 	160	*/ 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
    51  	/* 	176	*/ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
    52  })
    53  
    54  // Hash returns the hash value of the byte slice in 64bits.
    55  func Hash(data []byte) uint64 {
    56  	length := uint64(len(data))
    57  	xinput := *(*unsafe.Pointer)(unsafe.Pointer(&data))
    58  
    59  	if length > 240 {
    60  		return hashLarge(xinput, length)
    61  	} else if length > 128 {
    62  		return xxh3Len129To240_64b(xinput, length)
    63  	} else if length > 16 {
    64  		return xxh3Len17To128_64b(xinput, length)
    65  	} else {
    66  		return xxh3Len0To16_64b(xinput, length)
    67  	}
    68  }
    69  
    70  // HashString returns the hash value of the string in 64bits.
    71  func HashString(s string) uint64 {
    72  	return Hash([]byte(s))
    73  }
    74  
    75  // Hash128 returns the hash value of the byte slice in 128bits.
    76  func Hash128(data []byte) [2]uint64 {
    77  	length := uint64(len(data))
    78  	xinput := *(*unsafe.Pointer)(unsafe.Pointer(&data))
    79  
    80  	if length > 240 {
    81  		return hashLarge128(xinput, length)
    82  	} else if length > 128 {
    83  		return xxh3Len129To240_128b(xinput, length)
    84  	} else if length > 16 {
    85  		return xxh3Len17To128_128b(xinput, length)
    86  	} else {
    87  		return xxh3Len0To16_128b(xinput, length)
    88  	}
    89  }
    90  
    91  // Hash128String returns the hash value of the string in 128bits.
    92  func Hash128String(s string) [2]uint64 {
    93  	return Hash128([]byte(s))
    94  }
    95  
    96  func xxh3Len0To16_64b(xinput unsafe.Pointer, len uint64) uint64 {
    97  	length := uintptr(len)
    98  
    99  	if length > 8 {
   100  		inputlo := runtimex.ReadUnaligned64(xinput) ^ (runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+24)) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32)))
   101  		inputhi := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-8)) ^ (runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret) + 40))) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+48))
   102  		acc := len + bits.ReverseBytes64(inputlo) + inputhi + mix(inputlo, inputhi)
   103  		return xxh3Avalanche(acc)
   104  	} else if length >= 4 {
   105  		input1 := runtimex.ReadUnaligned32(xinput)
   106  		input2 := runtimex.ReadUnaligned32(unsafe.Pointer(uintptr(xinput) + length - 4))
   107  		input64 := input2 + input1<<32
   108  		keyed := input64 ^ (runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret) + 8))) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16))
   109  		return xxh3RRMXMX(keyed, len)
   110  	} else if length > 0 {
   111  		q := (*[4]byte)(xinput)
   112  		combined := (uint64(q[0]) << 16) | (uint64(q[len>>1]) << 24) | (uint64(q[len-1]) << 0) | len<<8
   113  		combined ^= runtimex.ReadUnaligned32(xsecret) ^ runtimex.ReadUnaligned32(unsafe.Pointer(uintptr(xsecret)+4))
   114  		return xxh64Avalanche(combined)
   115  	} else {
   116  		return xxh64Avalanche(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+56)) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+64)))
   117  	}
   118  }
   119  
   120  func xxh3Len17To128_64b(xinput unsafe.Pointer, len uint64) uint64 {
   121  	length := uintptr(len)
   122  
   123  	acc := len * prime64_1
   124  	if length > 32 {
   125  		if length > 64 {
   126  			if length > 96 {
   127  				acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+48))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+96)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+56))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+104)))
   128  				acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-64))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+112)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-56))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+120)))
   129  			}
   130  			acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+64)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+40))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+72)))
   131  			acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-48))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+80)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-40))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+88)))
   132  		}
   133  		acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+24))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+40)))
   134  		acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-32))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+48)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-24))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+56)))
   135  	}
   136  	acc += mix(runtimex.ReadUnaligned64(xinput)^runtimex.ReadUnaligned64(xsecret), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+8)))
   137  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-16))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+24)))
   138  
   139  	return xxh3Avalanche(acc)
   140  }
   141  
   142  func xxh3Len129To240_64b(xinput unsafe.Pointer, len uint64) uint64 {
   143  	length := uintptr(len)
   144  
   145  	acc := len * prime64_1
   146  
   147  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*0))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*0)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*0+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*0+8)))
   148  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*1))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*1)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*1+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*1+8)))
   149  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*2))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*2)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*2+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*2+8)))
   150  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*3))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*3)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*3+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*3+8)))
   151  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*4))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*4)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*4+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*4+8)))
   152  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*5))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*5)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*5+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*5+8)))
   153  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*6))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*6)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*6+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*6+8)))
   154  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*7))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*7)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*7+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*7+8)))
   155  
   156  	acc = xxh3Avalanche(acc)
   157  	nbRounds := uintptr(length >> 4)
   158  
   159  	for i := uintptr(8); i < nbRounds; i++ {
   160  		acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*i))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*i-125)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16*i+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16*i-117)))
   161  	}
   162  
   163  	acc += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-16))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+119)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+127)))
   164  
   165  	return xxh3Avalanche(acc)
   166  }
   167  
   168  func hashLarge(p unsafe.Pointer, length uint64) (acc uint64) {
   169  	acc = length * prime64_1
   170  
   171  	xacc := [8]uint64{
   172  		prime32_3, prime64_1, prime64_2, prime64_3,
   173  		prime64_4, prime32_2, prime64_5, prime32_1}
   174  
   175  	accumScalar(&xacc, p, xsecret, length)
   176  	//merge xacc
   177  	acc += mix(xacc[0]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+11)), xacc[1]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+19)))
   178  	acc += mix(xacc[2]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+27)), xacc[3]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+35)))
   179  	acc += mix(xacc[4]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+43)), xacc[5]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+51)))
   180  	acc += mix(xacc[6]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+59)), xacc[7]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+67)))
   181  
   182  	return xxh3Avalanche(acc)
   183  }
   184  
   185  func xxh3Len0To16_128b(xinput unsafe.Pointer, len uint64) [2]uint64 {
   186  	length := uintptr(len)
   187  
   188  	if length > 8 {
   189  		bitflipl := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32)) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+40))
   190  		bitfliph := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+48)) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+56))
   191  		inputLow := runtimex.ReadUnaligned64(xinput)
   192  		inputHigh := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput) + length - 8))
   193  		m128High64, m128Low64 := bits.Mul64(inputLow^inputHigh^bitflipl, prime64_1)
   194  
   195  		m128Low64 += uint64(length-1) << 54
   196  		inputHigh ^= bitfliph
   197  
   198  		m128High64 += inputHigh + uint64(uint32(inputHigh))*(prime32_2-1)
   199  		m128Low64 ^= bits.ReverseBytes64(m128High64)
   200  
   201  		h128High64, h128Low64 := bits.Mul64(m128Low64, prime64_2)
   202  		h128High64 += m128High64 * prime64_2
   203  
   204  		h128Low64 = xxh3Avalanche(h128Low64)
   205  		h128High64 = xxh3Avalanche(h128High64)
   206  
   207  		return [2]uint64{h128High64, h128Low64}
   208  
   209  	} else if length >= 4 {
   210  		inputLow := runtimex.ReadUnaligned32(xinput)
   211  		inputHigh := runtimex.ReadUnaligned32(unsafe.Pointer(uintptr(xinput) + uintptr(length-4)))
   212  		input64 := inputLow + (uint64(inputHigh) << 32)
   213  		bitflip := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16)) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+24))
   214  		keyed := input64 ^ bitflip
   215  
   216  		m128High64, m128Low64 := bits.Mul64(keyed, prime64_1+(len)<<2)
   217  		m128High64 += m128Low64 << 1
   218  		m128Low64 ^= m128High64 >> 3
   219  
   220  		m128Low64 ^= m128Low64 >> 35
   221  		m128Low64 *= 0x9fb21c651e98df25
   222  		m128Low64 ^= m128Low64 >> 28
   223  
   224  		m128High64 = xxh3Avalanche(m128High64)
   225  		return [2]uint64{m128High64, m128Low64}
   226  
   227  	} else if length >= 1 {
   228  		q := (*[4]byte)(xinput)
   229  		combinedl := (uint64(q[0]) << 16) | (uint64(q[len>>1]) << 24) | (uint64(q[len-1]) << 0) | len<<8
   230  		combinedh := uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(combinedl)), 13))
   231  
   232  		bitflipl := runtimex.ReadUnaligned32(xsecret) ^ runtimex.ReadUnaligned32(unsafe.Pointer(uintptr(xsecret)+4))
   233  		bitfliph := runtimex.ReadUnaligned32(unsafe.Pointer(uintptr(xsecret)+8)) ^ runtimex.ReadUnaligned32(unsafe.Pointer(uintptr(xsecret)+12))
   234  
   235  		keyedLow := combinedl ^ bitflipl
   236  		keyedHigh := combinedh ^ bitfliph
   237  
   238  		keyedLow = combinedl ^ bitflipl
   239  		keyedHigh = combinedh ^ bitfliph
   240  
   241  		h128Low64 := xxh64Avalanche(keyedLow)
   242  		h128High64 := xxh64Avalanche(keyedHigh)
   243  		return [2]uint64{h128High64, h128Low64}
   244  	}
   245  	bitflipl := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+64)) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+72))
   246  	bitfliph := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+80)) ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+88))
   247  
   248  	h128High64 := xxh64Avalanche(bitfliph)
   249  	h128Low64 := xxh64Avalanche(bitflipl)
   250  
   251  	return [2]uint64{h128High64, h128Low64}
   252  }
   253  
   254  func xxh3Len17To128_128b(xinput unsafe.Pointer, len uint64) [2]uint64 {
   255  	length := uintptr(len)
   256  
   257  	accHigh := uint64(0)
   258  	accLow := len * prime64_1
   259  
   260  	if length > 32 {
   261  		if length > 64 {
   262  			if length > 96 {
   263  				accLow += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+48))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+96)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+56))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+104)))
   264  				accLow ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-64)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-56))
   265  				accHigh += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-64))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+112)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-56))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+120)))
   266  				accHigh ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+48)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+56))
   267  			}
   268  			accLow += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+64)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+40))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+72)))
   269  			accLow ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-48)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-40))
   270  			accHigh += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-48))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+80)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-40))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+88)))
   271  			accHigh ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+40))
   272  		}
   273  		accLow += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+3*8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+40)))
   274  		accLow ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-32)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-3*8))
   275  		accHigh += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-32))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+48)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-3*8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+56)))
   276  		accHigh ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+16)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+3*8))
   277  	}
   278  
   279  	accLow += mix(runtimex.ReadUnaligned64(xinput)^runtimex.ReadUnaligned64(xsecret), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+8)))
   280  	accLow ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-16)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-8))
   281  	accHigh += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-16))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+16)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+24)))
   282  	accHigh ^= runtimex.ReadUnaligned64(xinput) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+8))
   283  
   284  	h128Low := accHigh + accLow
   285  	h128High := (accLow * prime64_1) + (accHigh * prime64_4) + (len * prime64_2)
   286  
   287  	h128Low = xxh3Avalanche(h128Low)
   288  	h128High = -xxh3Avalanche(h128High)
   289  
   290  	return [2]uint64{h128High, h128Low}
   291  }
   292  
   293  func xxh3Len129To240_128b(xinput unsafe.Pointer, len uint64) [2]uint64 {
   294  	length := uintptr(len)
   295  	nbRounds := length &^ 31 / 32
   296  	accLow64 := len * prime64_1
   297  	accHigh64 := uint64(0)
   298  
   299  	for i := uintptr(0); i < 4; i++ {
   300  		accLow64 += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32*i)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32*i+8)))
   301  		accLow64 ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+16)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+24))
   302  		accHigh64 += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+16))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32*i+16)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+24))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32*i+24)))
   303  		accHigh64 ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+8))
   304  	}
   305  
   306  	accLow64 = xxh3Avalanche(accLow64)
   307  	accHigh64 = xxh3Avalanche(accHigh64)
   308  
   309  	for i := uintptr(4); i < nbRounds; i++ {
   310  		accHigh64 += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+16))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32*i-109)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+24))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32*i-101)))
   311  		accHigh64 ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+8))
   312  
   313  		accLow64 += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32*i-125)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+32*i-117)))
   314  		accLow64 ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+16)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+32*i+24))
   315  	}
   316  
   317  	// last 32 bytes
   318  	accLow64 += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-16))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+103)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-8))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+111)))
   319  	accLow64 ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-32)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-24))
   320  	accHigh64 += mix(runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-32))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+119)), runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-24))^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+127)))
   321  	accHigh64 ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-16)) + runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput)+length-8))
   322  
   323  	accHigh64, accLow64 = (accLow64*prime64_1)+(accHigh64*prime64_4)+(len*prime64_2), accHigh64+accLow64
   324  
   325  	accLow64 = xxh3Avalanche(accLow64)
   326  	accHigh64 = -xxh3Avalanche(accHigh64)
   327  
   328  	return [2]uint64{accHigh64, accLow64}
   329  }
   330  
   331  func hashLarge128(p unsafe.Pointer, length uint64) (acc [2]uint64) {
   332  	acc[1] = length * prime64_1
   333  	acc[0] = ^(length * prime64_2)
   334  
   335  	xacc := [8]uint64{
   336  		prime32_3, prime64_1, prime64_2, prime64_3,
   337  		prime64_4, prime32_2, prime64_5, prime32_1}
   338  
   339  	accumScalar(&xacc, p, xsecret, length)
   340  	// merge xacc
   341  	acc[1] += mix(xacc[0]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+11)), xacc[1]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+19)))
   342  	acc[1] += mix(xacc[2]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+27)), xacc[3]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+35)))
   343  	acc[1] += mix(xacc[4]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+43)), xacc[5]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+51)))
   344  	acc[1] += mix(xacc[6]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+59)), xacc[7]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+67)))
   345  
   346  	acc[1] = xxh3Avalanche(acc[1])
   347  
   348  	acc[0] += mix(xacc[0]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+117)), xacc[1]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+125)))
   349  	acc[0] += mix(xacc[2]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+133)), xacc[3]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+141)))
   350  	acc[0] += mix(xacc[4]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+149)), xacc[5]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+157)))
   351  	acc[0] += mix(xacc[6]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+165)), xacc[7]^runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret)+173)))
   352  	acc[0] = xxh3Avalanche(acc[0])
   353  
   354  	return acc
   355  }
   356  
   357  func accumScalar(xacc *[8]uint64, xinput, xsecret unsafe.Pointer, l uint64) {
   358  	j := uint64(0)
   359  
   360  	// Loops over block, process 16*8*8=1024 bytes of data each iteration
   361  	for ; j < (l-1)/1024; j++ {
   362  		k := xsecret
   363  		for i := 0; i < 16; i++ {
   364  			for j := uintptr(0); j < 8; j++ {
   365  				dataVec := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput) + 8*j))
   366  				keyVec := dataVec ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(k)+8*j))
   367  				xacc[j^1] += dataVec
   368  				xacc[j] += (keyVec & 0xffffffff) * (keyVec >> 32)
   369  			}
   370  			xinput, k = unsafe.Pointer(uintptr(xinput)+_stripe), unsafe.Pointer(uintptr(k)+8)
   371  		}
   372  
   373  		// scramble xacc
   374  		for j := uintptr(0); j < 8; j++ {
   375  			xacc[j] ^= xacc[j] >> 47
   376  			xacc[j] ^= runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xsecret) + 128 + 8*j))
   377  			xacc[j] *= prime32_1
   378  		}
   379  	}
   380  	l -= _block * j
   381  
   382  	// last partial block (1024 bytes)
   383  	if l > 0 {
   384  		k := xsecret
   385  		i := uint64(0)
   386  		for ; i < (l-1)/_stripe; i++ {
   387  			for j := uintptr(0); j < 8; j++ {
   388  				dataVec := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput) + 8*j))
   389  				keyVec := dataVec ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(k)+8*j))
   390  				xacc[j^1] += dataVec
   391  				xacc[j] += (keyVec & 0xffffffff) * (keyVec >> 32)
   392  			}
   393  			xinput, k = unsafe.Pointer(uintptr(xinput)+_stripe), unsafe.Pointer(uintptr(k)+8)
   394  		}
   395  		l -= _stripe * i
   396  
   397  		// last stripe (64 bytes)
   398  		if l > 0 {
   399  			xinput = unsafe.Pointer(uintptr(xinput) - uintptr(_stripe-l))
   400  			k = unsafe.Pointer(uintptr(xsecret) + 121)
   401  
   402  			for j := uintptr(0); j < 8; j++ {
   403  				dataVec := runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(xinput) + 8*j))
   404  				keyVec := dataVec ^ runtimex.ReadUnaligned64(unsafe.Pointer(uintptr(k)+8*j))
   405  				xacc[j^1] += dataVec
   406  				xacc[j] += (keyVec & 0xffffffff) * (keyVec >> 32)
   407  			}
   408  		}
   409  	}
   410  }
   411  
   412  func mix(a, b uint64) uint64 {
   413  	hi, lo := bits.Mul64(a, b)
   414  	return hi ^ lo
   415  }
   416  func xxh3RRMXMX(h64 uint64, length uint64) uint64 {
   417  	h64 ^= bits.RotateLeft64(h64, 49) ^ bits.RotateLeft64(h64, 24)
   418  	h64 *= 0x9fb21c651e98df25
   419  	h64 ^= (h64 >> 35) + length
   420  	h64 *= 0x9fb21c651e98df25
   421  	h64 ^= (h64 >> 28)
   422  	return h64
   423  }
   424  
   425  func xxh64Avalanche(h64 uint64) uint64 {
   426  	h64 ^= h64 >> 33
   427  	h64 *= prime64_2
   428  	h64 ^= h64 >> 29
   429  	h64 *= prime64_3
   430  	h64 ^= h64 >> 32
   431  	return h64
   432  }
   433  
   434  func xxh3Avalanche(x uint64) uint64 {
   435  	x ^= x >> 37
   436  	x *= 0x165667919e3779f9
   437  	x ^= x >> 32
   438  	return x
   439  }